{"question": "How many traffic accidents were reported in the state of California in the last 6 months?", "schema": "CREATE TABLE traffic_accidents (id INT, date DATE, state VARCHAR(255)); INSERT INTO traffic_accidents (id, date, state) VALUES (1, '2022-01-01', 'California'), (2, '2022-01-15', 'California'), (3, '2022-02-01', 'California');", "sql": "SELECT COUNT(*) FROM traffic_accidents WHERE state = 'California' AND date > DATE_SUB(CURRENT_DATE, INTERVAL 6 MONTH);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 118, "num_statements": 1}
{"question": "Which destination in Mexico had the highest increase in visitors from 2019 to 2022?", "schema": "CREATE TABLE mexico_tourism (destination VARCHAR(50), year INT, visitors INT); INSERT INTO mexico_tourism (destination, year, visitors) VALUES ('Cancun', 2019, 2000000), ('Cancun', 2022, 2500000), ('Puerto Vallarta', 2019, 1500000), ('Puerto Vallarta', 2022, 2000000);", "sql": "SELECT destination, MAX(visitors) - MIN(visitors) AS increase FROM mexico_tourism WHERE year IN (2019, 2022) GROUP BY destination ORDER BY increase DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 161, "num_statements": 1}
{"question": "What is the maximum salary for male employees hired before June 2021?", "schema": "CREATE TABLE Employees (EmployeeID INT, Department VARCHAR(50), Gender VARCHAR(10), Salary FLOAT, HireDate DATE); INSERT INTO Employees (EmployeeID, Department, Gender, Salary, HireDate) VALUES (1, 'IT', 'Male', 85000, '2021-04-20'), (2, 'HR', 'Female', 75000, '2019-12-15'), (3, 'IT', 'Female', 80000, '2020-01-08'), (4, 'IT', 'Male', 90000, '2021-04-01'), (5, 'Finance', 'Male', 75000, '2019-12-28'), (6, 'IT', 'Male', 88000, '2021-05-12'), (7, 'Marketing', 'Female', 78000, '2021-07-01'), (8, 'HR', 'Male', 80000, '2021-02-15');", "sql": "SELECT MAX(Salary) FROM Employees WHERE Gender = 'Male' AND HireDate < '2021-06-01';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: WHich Name origin has a Longitude of 332.5e?", "schema": "CREATE TABLE table_name_94 (name VARCHAR, longitude VARCHAR)", "sql": "SELECT name AS origin FROM table_name_94 WHERE longitude = '332.5e';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1}
{"question": "What is the maximum selling price of natural makeup products in France?", "schema": "CREATE TABLE MakeupProducts (product_id INT, product_name VARCHAR(255), price DECIMAL(5,2), is_natural BOOLEAN, country VARCHAR(50));", "sql": "SELECT MAX(price) FROM MakeupProducts WHERE is_natural = TRUE AND country = 'France';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Which organizations were founded in 1972, but became WOSM members until 1977?", "schema": "CREATE TABLE table_104858_1 (name_of_member_organization VARCHAR, year_member_organization_was_founded VARCHAR, year_current_scouting_organization_joined_wosm VARCHAR)", "sql": "SELECT name_of_member_organization FROM table_104858_1 WHERE year_member_organization_was_founded = '1972' AND year_current_scouting_organization_joined_wosm = '1977';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 167, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Away team Swindon Town had a Tie no listing of Replay with what as an Attendance?", "schema": "CREATE TABLE table_name_74 (attendance VARCHAR, tie_no VARCHAR, away_team VARCHAR)", "sql": "SELECT attendance FROM table_name_74 WHERE tie_no = 'replay' AND away_team = 'swindon town';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1}
{"question": "Write the DDL statement from PostgreSQL regression test 'publication' (example 112).", "schema": null, "sql": "CREATE SCHEMA testpub_rf_schema1;", "explanation": "DDL from PostgreSQL core regression test for Publication.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 33, "num_statements": 1}
{"question": "Write the DML statement from PostgreSQL regression test 'rules' (example 541).", "schema": null, "sql": "INSERT INTO hats VALUES ('h7', 'blue') RETURNING *;", "explanation": "DML from PostgreSQL core regression test for Rules.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": true, "sql_length": 51, "num_statements": 1}
{"question": "Find the average mental health score of students in the 'Spring' semester", "schema": "CREATE TABLE student_mental_health (student_id INT, semester VARCHAR(10), mental_health_score INT); INSERT INTO student_mental_health (student_id, semester, mental_health_score) VALUES (1, 'Spring', 75), (2, 'Spring', 80), (3, 'Fall', 70);", "sql": "SELECT AVG(mental_health_score) FROM student_mental_health WHERE semester = 'Spring';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1}
{"question": "Show a query using PostgreSQL contrib extension 'isn' (example 18).", "schema": null, "sql": "SELECT '1234567890120'::EAN13;", "explanation": "Example query from the 'isn' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 30, "num_statements": 1}
{"question": "PostgreSQL regression test 'tstypes': Write the SELECT query (example 171).", "schema": null, "sql": "SELECT ts_delete('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector, ARRAY['spaceshi','rebel']);", "explanation": "Regression test for Tstypes in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT ts_delete('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector, ARRAY['spaceshi','rebel'])) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 118, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: How many years was the pageant miss globe international and delegate was karen loren medrano agustin?", "schema": "CREATE TABLE table_1825751_14 (year VARCHAR, pageant VARCHAR, delegate VARCHAR)", "sql": "SELECT COUNT(year) FROM table_1825751_14 WHERE pageant = 'Miss Globe International' AND delegate = 'Karen Loren Medrano Agustin';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 129, "num_statements": 1}
{"question": "What is the average water consumption per household in the city of Seattle?", "schema": "CREATE TABLE Household (ID INT, City VARCHAR(20), Consumption FLOAT); INSERT INTO Household (ID, City, Consumption) VALUES (1, 'Seattle', 12.3), (2, 'NYC', 10.5), (3, 'Seattle', 13.8);", "sql": "SELECT AVG(Consumption) FROM Household WHERE City = 'Seattle';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1}
{"question": "How many individuals have been served by access to justice initiatives in Europe since 2017?", "schema": "CREATE TABLE initiatives (initiative_id INT, year INT, individuals_served INT); INSERT INTO initiatives (initiative_id, year, individuals_served) VALUES (1, 2017, 2000), (2, 2018, 3000); CREATE TABLE locations (initiative_id INT, region VARCHAR(20)); INSERT INTO locations (initiative_id, region) VALUES (1, 'Europe'), (2, 'North America');", "sql": "SELECT SUM(initiatives.individuals_served) FROM initiatives INNER JOIN locations ON initiatives.initiative_id = locations.initiative_id WHERE locations.region = 'Europe' AND initiatives.year >= 2017;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 199, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: How many draws have rumba/tango dance styles?", "schema": "CREATE TABLE table_name_79 (draw VARCHAR, dance_styles VARCHAR)", "sql": "SELECT COUNT(draw) FROM table_name_79 WHERE dance_styles = 'rumba/tango';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1}
{"question": "What is the percentage of female faculty members in the university?", "schema": "CREATE TABLE university_faculty (id INT, gender VARCHAR(10)); INSERT INTO university_faculty (id, gender) VALUES (1, 'Female'), (2, 'Male'), (3, 'Male'), (4, 'Female'), (5, 'Female');", "sql": "SELECT ROUND(100.0 * SUM(CASE WHEN gender = 'Female' THEN 1 ELSE 0 END) / COUNT(*), 1) as pct_female_faculty;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 109, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What is team 1 if Renova is team 2?", "schema": "CREATE TABLE table_name_99 (team_1 VARCHAR, team_2 VARCHAR)", "sql": "SELECT team_1 FROM table_name_99 WHERE team_2 = 'renova';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: how many matches did the player that played 23 matches win", "schema": "CREATE TABLE table_29302711_12 (matches_won INTEGER, matches_played VARCHAR)", "sql": "SELECT MIN(matches_won) FROM table_29302711_12 WHERE matches_played = 23;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1}
{"question": "What is the total revenue generated by each product type, sorted by the total revenue in descending order?", "schema": "CREATE TABLE RevenueByProduct (product VARCHAR(255), revenue DECIMAL(10,2)); INSERT INTO RevenueByProduct (product, revenue) VALUES ('Flower', 50000), ('Concentrates', 35000), ('Edibles', 40000), ('Topicals', 25000);", "sql": "SELECT product, SUM(revenue) as total_revenue FROM RevenueByProduct GROUP BY product ORDER BY total_revenue DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 113, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What is the number of clubs before 2003 with a 4th place winner of Shenzhen Jianlibao?", "schema": "CREATE TABLE table_name_66 (number_of_clubs INTEGER, fourth_placed VARCHAR, season VARCHAR)", "sql": "SELECT AVG(number_of_clubs) FROM table_name_66 WHERE fourth_placed = 'shenzhen jianlibao' AND season < 2003;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 108, "num_statements": 1}
{"question": "How many incidents were recorded for Vessel3 between January 1, 2021 and June 30, 2021?", "schema": "CREATE TABLE VesselIncidents(IncidentID INT, VesselID INT, IncidentType TEXT, IncidentDate DATETIME); INSERT INTO VesselIncidents(IncidentID, VesselID, IncidentType, IncidentDate) VALUES (1, 3, 'Collision', '2021-03-15 14:30:00'), (2, 3, 'Mechanical Failure', '2021-05-02 08:00:00');", "sql": "SELECT COUNT(*) FROM VesselIncidents WHERE VesselID = 3 AND IncidentDate BETWEEN '2021-01-01' AND '2021-06-30';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 111, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What is the average number played of the team with 1 drawn and 24 against?", "schema": "CREATE TABLE table_name_79 (played INTEGER, drawn VARCHAR, against VARCHAR)", "sql": "SELECT AVG(played) FROM table_name_79 WHERE drawn = 1 AND against = 24;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What is the product, chromosome and porphyria related to the enzymes which take effect at the location 'Cytosol'?", "schema": "CREATE TABLE enzyme (product VARCHAR, chromosome VARCHAR, porphyria VARCHAR, LOCATION VARCHAR)", "sql": "SELECT product, chromosome, porphyria FROM enzyme WHERE LOCATION = 'Cytosol';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1}
{"question": "What is the total number of players who play games on each platform and in each city?", "schema": "CREATE TABLE Players (PlayerID INT, City VARCHAR(20), Platform VARCHAR(10)); INSERT INTO Players (PlayerID, City, Platform) VALUES (1, 'Tokyo', 'PC'), (2, 'Los Angeles', 'Console'), (3, 'New York', 'PC'), (4, 'Paris', 'VR'), (5, 'Tokyo', 'Console'), (6, 'Los Angeles', 'PC');", "sql": "SELECT City, Platform, COUNT(*) AS Count FROM Players GROUP BY City, Platform ORDER BY Count DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 1}
{"question": "Write the DML statement from PostgreSQL regression test 'create_misc' (example 56).", "schema": null, "sql": "INSERT INTO f_star (class) VALUES ('f');", "explanation": "DML from PostgreSQL core regression test for Create Misc.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 40, "num_statements": 1}
{"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 68).", "schema": null, "sql": "CREATE FUNCTION gbt_int4_distance(internal,int4,int2,oid,internal)\nRETURNS float8\nAS 'MODULE_PATHNAME'\nLANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 145, "num_statements": 1}
{"question": "What is the total donation amount for each preservation project in each location?", "schema": "CREATE TABLE PreservationProjects (id INT, name VARCHAR(255), location VARCHAR(255), start_date DATE, end_date DATE, donation_amount FLOAT);", "sql": "SELECT p.location, p.name, SUM(p.donation_amount) FROM PreservationProjects p GROUP BY p.location, p.name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 106, "num_statements": 1}
{"question": "How many 'high' severity security incidents were recorded in the last month for the 'finance' department?", "schema": "CREATE TABLE incidents (id INT, department VARCHAR(255), severity VARCHAR(255), incident_date DATE); INSERT INTO incidents (id, department, severity, incident_date) VALUES (1, 'finance', 'high', '2022-04-15'), (2, 'IT', 'medium', '2022-02-20'), (3, 'finance', 'medium', '2022-03-05'); SELECT CURDATE(), DATE_SUB(CURDATE(), INTERVAL 1 MONTH) INTO @current_date, @start_date; SELECT COUNT(*) FROM incidents WHERE department = 'finance' AND severity = 'high' AND incident_date BETWEEN @start_date AND @current_date;", "sql": "SELECT COUNT(*) FROM incidents WHERE department = 'finance' AND severity = 'high' AND incident_date BETWEEN DATE_SUB(CURDATE(), INTERVAL 1 MONTH) AND CURDATE();", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 160, "num_statements": 1}
{"question": "What is the total transaction value for each month of the year 2022?", "schema": "CREATE TABLE transactions (transaction_id INT, transaction_date DATE, transaction_category VARCHAR(255), transaction_value DECIMAL(10,2)); INSERT INTO transactions (transaction_id, transaction_date, transaction_category, transaction_value) VALUES (1, '2022-01-02', 'Food', 75.00), (2, '2022-02-05', 'Electronics', 350.00), (3, '2022-03-10', 'Clothing', 200.00);", "sql": "SELECT YEAR(transaction_date) as year, MONTH(transaction_date) as month, SUM(transaction_value) as total_value FROM transactions WHERE transaction_date BETWEEN '2022-01-01' AND '2022-12-31' GROUP BY year, month;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 211, "num_statements": 1}
{"question": "What is the total amount of funding received by 'community_development' table where the 'community_name' is 'peace_village'?", "schema": "CREATE TABLE community_development (id INT, community_name TEXT, community_size INT, region TEXT, funding FLOAT);", "sql": "SELECT SUM(funding) FROM community_development WHERE community_name = 'peace_village';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What was the score of the game on November 9 when Atlanta was the visiting team?", "schema": "CREATE TABLE table_name_73 (score VARCHAR, visitor VARCHAR, date VARCHAR)", "sql": "SELECT score FROM table_name_73 WHERE visitor = 'atlanta' AND date = 'november 9';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What was the attendance of the bowl game in Gainesville, Fl?", "schema": "CREATE TABLE table_15190346_2 (attendance VARCHAR, location VARCHAR)", "sql": "SELECT attendance FROM table_15190346_2 WHERE location = 'Gainesville, FL';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1}
{"question": "List the names and types of all military technologies that were developed in the last 5 years.", "schema": "CREATE TABLE military_technology (id INT, name VARCHAR(255), technology_type VARCHAR(255), development_date DATE);INSERT INTO military_technology (id, name, technology_type, development_date) VALUES (1, 'F-35', 'Fighter Jet', '2010-01-01'), (2, 'M1 Abrams', 'Tank', '2005-01-01'), (3, 'Patriot', 'Missile System', '2015-01-01');", "sql": "SELECT name, technology_type FROM military_technology WHERE YEAR(development_date) >= 2015;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 1}
{"question": "How many packages were shipped from Colombia to Brazil in Q1 2022?", "schema": "CREATE TABLE packages (package_id INT, origin_country VARCHAR(255), destination_country VARCHAR(255), shipped_qty INT, shipped_quarter INT, shipped_year INT); INSERT INTO packages (package_id, origin_country, destination_country, shipped_qty, shipped_quarter, shipped_year) VALUES (1, 'Colombia', 'Brazil', 250, 1, 2022), (2, 'Brazil', 'Argentina', 300, 1, 2022), (3, 'Chile', 'Brazil', 350, 1, 2022);", "sql": "SELECT SUM(shipped_qty) as total_shipped FROM packages WHERE origin_country = 'Colombia' AND destination_country = 'Brazil' AND shipped_quarter = 1 AND shipped_year = 2022;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 172, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What is the ga date of the t5120 model?", "schema": "CREATE TABLE table_10818465_1 (ga_date VARCHAR, model VARCHAR)", "sql": "SELECT ga_date FROM table_10818465_1 WHERE model = 'T5120';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: How many were written by Peter Winther?", "schema": "CREATE TABLE table_20704243_3 (written_by VARCHAR, directed_by VARCHAR)", "sql": "SELECT COUNT(written_by) FROM table_20704243_3 WHERE directed_by = 'Peter Winther';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What is the name of the race on 9 July?", "schema": "CREATE TABLE table_name_72 (name VARCHAR, date VARCHAR)", "sql": "SELECT name FROM table_name_72 WHERE date = '9 july';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1}
{"question": "PostgreSQL regression test 'jsonb': Write the SELECT query (example 77).", "schema": null, "sql": "SELECT '[1,2]'::jsonb @> '[1,2,2]'::jsonb;", "explanation": "Regression test for Jsonb in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT '[1,2]'::jsonb @> '[1,2,2]'::jsonb) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 42, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Find the country of the airlines whose name starts with 'Orbit'.", "schema": "CREATE TABLE airlines (country VARCHAR, name VARCHAR)", "sql": "SELECT country FROM airlines WHERE name LIKE 'Orbit%';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Which Bronze has a Rank of 4, and a Gold smaller than 0?", "schema": "CREATE TABLE table_name_7 (bronze INTEGER, rank VARCHAR, gold VARCHAR)", "sql": "SELECT MAX(bronze) FROM table_name_7 WHERE rank = 4 AND gold < 0;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Who did wilson reis fight against that lasted less than 3 rounds with a time of 1:02?", "schema": "CREATE TABLE table_name_25 (opponent VARCHAR, round VARCHAR, time VARCHAR)", "sql": "SELECT opponent FROM table_name_25 WHERE round < 3 AND time = '1:02';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What was the opponent at the game that had a loss of Travers (0–2)?", "schema": "CREATE TABLE table_name_9 (opponent VARCHAR, loss VARCHAR)", "sql": "SELECT opponent FROM table_name_9 WHERE loss = 'travers (0–2)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Which Time/Retired has less than 77 laps, and a Grid of 16?", "schema": "CREATE TABLE table_name_68 (time_retired VARCHAR, laps VARCHAR, grid VARCHAR)", "sql": "SELECT time_retired FROM table_name_68 WHERE laps < 77 AND grid = 16;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1}
{"question": "What is the total amount of climate finance invested in renewable energy in Africa?", "schema": "CREATE TABLE ClimateFinance (Country TEXT, Sector TEXT, Investment_Amount NUMERIC); INSERT INTO ClimateFinance (Country, Sector, Investment_Amount) VALUES ('South Africa', 'Renewable Energy', 5000000), ('Kenya', 'Renewable Energy', 2000000), ('Nigeria', 'Renewable Energy', 7000000);", "sql": "SELECT SUM(Investment_Amount) FROM ClimateFinance WHERE Country IN ('South Africa', 'Kenya', 'Nigeria') AND Sector = 'Renewable Energy';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 136, "num_statements": 1}
{"question": "What is the average number of players per multiplayer game in Europe?", "schema": "CREATE TABLE Games (GameID INT, GameType VARCHAR(255), Multiplayer INT); INSERT INTO Games (GameID, GameType, Multiplayer) VALUES (1, 'Racing', 0); INSERT INTO Games (GameID, GameType, Multiplayer) VALUES (2, 'Shooter', 1); CREATE TABLE Players (PlayerID INT, GameID INT); INSERT INTO Players (PlayerID, GameID) VALUES (1, 1); INSERT INTO Players (PlayerID, GameID) VALUES (1, 2); INSERT INTO Players (PlayerID, GameID) VALUES (2, 2); INSERT INTO Players (PlayerID, GameID) VALUES (3, 2); INSERT INTO Players (PlayerID, GameID) VALUES (4, 2); INSERT INTO Players (PlayerID, GameID) VALUES (5, 2); INSERT INTO Players (PlayerID, GameID) VALUES (6, 2); INSERT INTO Players (PlayerID, GameID) VALUES (7, 2); INSERT INTO Players (PlayerID, GameID) VALUES (8, 2); INSERT INTO Players (PlayerID, GameID) VALUES (9, 2); INSERT INTO Players (PlayerID, GameID) VALUES (10, 2);", "sql": "SELECT AVG(CountPlayers) FROM (SELECT GameID, COUNT(PlayerID) AS CountPlayers FROM Players INNER JOIN Games ON Players.GameID = Games.GameID WHERE Games.Multiplayer = 1 GROUP BY GameID) AS Subquery WHERE EXISTS (SELECT GameID FROM Games WHERE ReleaseCountry LIKE '%Europe%' AND Games.GameID = Subquery.GameID);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 310, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What is the score when the team is milwaukee?", "schema": "CREATE TABLE table_27734769_8 (score VARCHAR, team VARCHAR)", "sql": "SELECT score FROM table_27734769_8 WHERE team = 'Milwaukee';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What was the result of the game played on April 16 with Philadelphia as home team?", "schema": "CREATE TABLE table_name_64 (result VARCHAR, home_team VARCHAR, date VARCHAR)", "sql": "SELECT result FROM table_name_64 WHERE home_team = 'philadelphia' AND date = 'april 16';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1}
{"question": "Calculate the percentage of unresolved vulnerabilities by region", "schema": "CREATE TABLE vulnerabilities (id INT, region VARCHAR(10), status VARCHAR(10)); INSERT INTO vulnerabilities (id, region, status) VALUES (1, 'EMEA', 'Resolved'), (2, 'APAC', 'Unresolved'), (3, 'AMER', 'Resolved'), (4, 'EMEA', 'Resolved'), (5, 'APAC', 'Resolved'), (6, 'AMER', 'Unresolved');", "sql": "SELECT region, 100.0 * SUM(CASE WHEN status = 'Unresolved' THEN 1 ELSE 0 END) / COUNT(*) as unresolved_percentage FROM vulnerabilities GROUP BY region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 151, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What are the points for ktm-vmc equipment?", "schema": "CREATE TABLE table_16729457_18 (points VARCHAR, equipment VARCHAR)", "sql": "SELECT points FROM table_16729457_18 WHERE equipment = 'KTM-VMC';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1}
{"question": "Identify the menu items that were 86'ed (ran out) before 5pm on a specific day for the 'Gourmet Greens' restaurant.", "schema": "CREATE TABLE menu_items (id INT, restaurant_id INT, name VARCHAR(50), available INT, available_time TIME); INSERT INTO menu_items (id, restaurant_id, name, available, available_time) VALUES (1, 3, 'Kale Salad', 10, '12:00:00'), (2, 3, 'Beet Burger', 15, '12:00:00'), (3, 3, 'Carrot Fries', 20, '12:00:00'), (4, 3, 'Quinoa Bowl', 30, '12:00:00');", "sql": "SELECT name FROM menu_items WHERE restaurant_id = 3 AND available_time < '17:00:00' GROUP BY name HAVING SUM(available) = 0;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 124, "num_statements": 1}
{"question": "Show a SQL definition from the timescaledb project (create_table_with, item 19).", "schema": null, "sql": "CREATE TABLE t2(time timestamptz, device text, value float) WITH (timescaledb.nonexistent_param = false);", "explanation": "SQL definition from the open-source timescaledb PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 105, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: For which Game 4 did Michael O'Connor play wing position?", "schema": "CREATE TABLE table_name_8 (game_4 VARCHAR, position VARCHAR, game_1 VARCHAR)", "sql": "SELECT game_4 FROM table_name_8 WHERE position = 'wing' AND game_1 = 'michael o'connor';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1}
{"question": "What is the number of unique users who have streamed music from each genre?", "schema": "CREATE TABLE user_genre_streams (stream_id int, user_id int, timestamp datetime, genre varchar(255)); INSERT INTO user_genre_streams (stream_id, user_id, timestamp, genre) VALUES (1, 123, '2022-01-01 10:00:00', 'Rock');", "sql": "SELECT genre, COUNT(DISTINCT user_id) as unique_users FROM user_genre_streams WHERE timestamp BETWEEN '2022-01-01' AND '2022-12-31' GROUP BY genre;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 147, "num_statements": 1}
{"question": "Insert data into the 'auto_show' table", "schema": "CREATE TABLE auto_show (id INT PRIMARY KEY, show_name VARCHAR(255), location VARCHAR(255), start_date DATE, end_date DATE);", "sql": "INSERT INTO auto_show (id, show_name, location, start_date, end_date) VALUES (1, 'Paris Motor Show', 'Paris, France', '2023-10-01', '2023-10-15');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 146, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What are all the name(namesakes) for the entire chart?", "schema": "CREATE TABLE table_29860752_11 (name__namesake_ VARCHAR)", "sql": "SELECT name__namesake_ FROM table_29860752_11;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 46, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: How many entries are shown for date of successors formal installation where successor is john w. walker (dr)?", "schema": "CREATE TABLE table_225099_3 (date_of_successors_formal_installation VARCHAR, successor VARCHAR)", "sql": "SELECT COUNT(date_of_successors_formal_installation) FROM table_225099_3 WHERE successor = 'John W. Walker (DR)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 113, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Tell me the highest total when the horse is spender s", "schema": "CREATE TABLE table_name_99 (total INTEGER, horse VARCHAR)", "sql": "SELECT MAX(total) FROM table_name_99 WHERE horse = 'spender s';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Which nation has a Bronze and Silver smaller than 1 and a Gold larger than 1?", "schema": "CREATE TABLE table_name_87 (nation VARCHAR, gold VARCHAR, bronze VARCHAR, silver VARCHAR)", "sql": "SELECT nation FROM table_name_87 WHERE bronze < 1 AND silver < 1 AND gold > 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1}
{"question": "Find healthcare providers with low cultural competency scores serving Latinx in FL.", "schema": "CREATE TABLE healthcare_providers (provider_id INT, name TEXT, state TEXT); INSERT INTO healthcare_providers (provider_id, name, state) VALUES (1, 'Dr. Ana Perez', 'FL'); CREATE TABLE cultural_competency (provider_id INT, score INT, community TEXT);", "sql": "SELECT h.name, c.score FROM healthcare_providers h INNER JOIN cultural_competency c ON h.provider_id = c.provider_id WHERE h.state = 'FL' AND c.community = 'Latinx' AND c.score < 50;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 182, "num_statements": 1}
{"question": "What is the average order size for each salesperson?", "schema": "CREATE TABLE salesperson (id INT, name VARCHAR(50), region VARCHAR(50)); INSERT INTO salesperson (id, name, region) VALUES (1, 'John Doe', 'North'), (2, 'Jane Smith', 'South'); CREATE TABLE orders (id INT, salesperson_id INT, size INT); INSERT INTO orders (id, salesperson_id, size) VALUES (1, 1, 10), (2, 1, 15), (3, 2, 20), (4, 2, 25);", "sql": "SELECT salesperson_id, AVG(size) as avg_order_size FROM orders GROUP BY salesperson_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1}
{"question": "pgTAP test for Valueset (assertion 230).", "schema": null, "sql": "/****************************************************************************/\n-- Now test set_eq().\n\nSELECT * FROM check_test(\n set_ne( 'anames', 'expect' ),\n false,\n 'set_ne(prepared, prepared) fail',\n '',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Valueset.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 229, "num_statements": 1}
{"question": "What is the number of circular economy initiatives for each country in 2020?", "schema": "CREATE TABLE circular_economy (country VARCHAR(255), year INT, initiatives INT); INSERT INTO circular_economy (country, year, initiatives) VALUES ('USA', 2020, 15), ('Canada', 2020, 10), ('Mexico', 2020, 12);", "sql": "SELECT c.country, COUNT(c.initiatives) as num_initiatives FROM circular_economy c WHERE c.year = 2020 GROUP BY c.country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 121, "num_statements": 1}
{"question": "What is the number of food safety violations per category?", "schema": "CREATE TABLE Menu_Categories (Category_ID INT, Category_Name TEXT); INSERT INTO Menu_Categories (Category_ID, Category_Name) VALUES (1, 'Main Dishes'), (2, 'Sides'); CREATE TABLE Menu_Items (Item_ID INT, Item_Name TEXT, Category_ID INT); INSERT INTO Menu_Items (Item_ID, Item_Name, Category_ID) VALUES (1, 'Burger', 1), (2, 'Pizza', 1), (3, 'Fries', 2), (4, 'Salad', 2); CREATE TABLE Inspections (Inspection_ID INT, Item_ID INT, Violation_Count INT); INSERT INTO Inspections (Inspection_ID, Item_ID, Violation_Count) VALUES (1, 1, 3), (2, 1, 2), (3, 2, 1), (4, 2, 0), (5, 3, 1), (6, 4, 0);", "sql": "SELECT MC.Category_Name, SUM(I.Violation_Count) as Total_Violations FROM Inspections I JOIN Menu_Items MI ON I.Item_ID = MI.Item_ID JOIN Menu_Categories MC ON MI.Category_ID = MC.Category_ID GROUP BY MC.Category_Name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 217, "num_statements": 1}
{"question": "Find the total revenue of organic haircare products in the Asian market for the current year.", "schema": "CREATE TABLE sales(product_id INT, sale_date DATE, revenue DECIMAL(10,2), country VARCHAR(50)); INSERT INTO sales VALUES (11, '2021-06-15', 50.00, 'CN'); INSERT INTO sales VALUES (12, '2021-07-16', 60.00, 'JP'); CREATE TABLE products(product_id INT, product_name VARCHAR(50), is_organic BOOLEAN, product_category VARCHAR(50)); INSERT INTO products VALUES (11, 'Rosemary Shampoo', TRUE, 'Haircare'); INSERT INTO products VALUES (12, 'Lavender Conditioner', TRUE, 'Haircare');", "sql": "SELECT SUM(sales.revenue) as total_revenue FROM sales JOIN products ON sales.product_id = products.product_id WHERE products.is_organic = TRUE AND sales.country = 'Asia' AND YEAR(sales.sale_date) = YEAR(CURDATE());", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 214, "num_statements": 1}
{"question": "Which aircraft has the most accidents in a specific region?", "schema": "CREATE TABLE aircrafts (aircraft_id INT, model VARCHAR(50), region VARCHAR(50)); INSERT INTO aircrafts (aircraft_id, model, region) VALUES (1, 'Boeing 747', 'North America'), (2, 'Airbus A320', 'Europe'), (3, 'Boeing 737', 'Asia'); CREATE TABLE accidents (accident_id INT, aircraft_id INT, date DATE); INSERT INTO accidents (accident_id, aircraft_id) VALUES (1, 1), (2, 1), (3, 3), (4, 2), (5, 2);", "sql": "SELECT a.model, COUNT(*) as num_accidents FROM aircrafts a JOIN accidents b ON a.aircraft_id = b.aircraft_id WHERE a.region = 'North America' GROUP BY a.model ORDER BY num_accidents DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 195, "num_statements": 1}
{"question": "Write the PL/pgSQL object from PostgreSQL regression test 'encoding' (example 1).", "schema": null, "sql": "/* skip test if not UTF8 server encoding */\nSELECT getdatabaseencoding() <> 'UTF8' AS skip_test \\gset\n\\if :skip_test\n\\quit\n\\endif\n\n\\getenv libdir PG_LIBDIR\n\\getenv dlsuffix PG_DLSUFFIX\n\n\\set regresslib :libdir '/regress' :dlsuffix\n\nCREATE FUNCTION test_bytea_to_text(bytea) RETURNS text\n AS :'regresslib' LANGUAGE C STRICT;", "explanation": "PL/pgSQL object from PostgreSQL core test for Encoding.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 326, "num_statements": 1}
{"question": "pgTAP test for Resultset (assertion 443).", "schema": null, "sql": "DECLARE cwant CURSOR FOR SELECT id, name FROM names WHERE name like 'An%' ORDER BY id;", "explanation": "SQL assertion from pgTAP test suite for Resultset.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "plpgsql", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1}
{"question": "What is the total weight loss in pounds for members who have lost weight since they joined?", "schema": "CREATE TABLE health_metrics (member_id INT, weight_loss_pounds FLOAT, last_checked DATE); INSERT INTO health_metrics (member_id, weight_loss_pounds, last_checked) VALUES (1, 3, '2021-01-15'), (2, 7, '2022-03-28');", "sql": "SELECT SUM(weight_loss_pounds) FROM health_metrics JOIN members ON health_metrics.member_id = members.member_id WHERE health_metrics.weight_loss_pounds > 0;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 156, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What is the average overall pick number for the USF player who was picked after round 5?", "schema": "CREATE TABLE table_name_23 (overall INTEGER, college VARCHAR, round VARCHAR)", "sql": "SELECT AVG(overall) FROM table_name_23 WHERE college = 'usf' AND round > 5;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Who directed the episode written cyrus nowrasteh?", "schema": "CREATE TABLE table_name_85 (directed_by VARCHAR, written_by VARCHAR)", "sql": "SELECT directed_by FROM table_name_85 WHERE written_by = 'cyrus nowrasteh';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1}
{"question": "Show a query using PostgreSQL contrib extension 'citext' (example 354).", "schema": null, "sql": "SELECT regexp_matches('foobarbequebaz'::citext, '(bar)(beque)') = ARRAY[ 'bar', 'beque' ] AS t;", "explanation": "Example query from the 'citext' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: who is the opponent where tv is abc and game site is tampa stadium", "schema": "CREATE TABLE table_11406866_2 (opponent VARCHAR, tv VARCHAR, game_site VARCHAR)", "sql": "SELECT opponent FROM table_11406866_2 WHERE tv = 'ABC' AND game_site = 'Tampa Stadium';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1}
{"question": "How many local businesses have partnered with hotels in Japan and South Korea?", "schema": "CREATE TABLE BusinessPartnerships(id INT, business_id INT, hotel_id INT); INSERT INTO BusinessPartnerships(id, business_id, hotel_id) VALUES (1, 1, 1), (2, 2, 2), (3, 3, null), (6, 4, 3), (7, 5, 4); CREATE TABLE LocalBusinesses(id INT, name TEXT, country TEXT); INSERT INTO LocalBusinesses(id, name, country) VALUES (1, 'Surf Shop', 'Japan'), (2, 'Coffee House', 'Japan'), (3, 'Art Gallery', 'Japan'), (4, 'Ramen Shop', 'South Korea'), (5, 'Tea House', 'South Korea'); CREATE TABLE Hotels(id INT, name TEXT, country TEXT); INSERT INTO Hotels(id, name, country) VALUES (1, 'Ocean View', 'Japan'), (2, 'Harbor Inn', 'Japan'), (3, 'Seoul Palace', 'South Korea'), (4, 'Kyoto Garden', 'South Korea');", "sql": "SELECT COUNT(*) FROM BusinessPartnerships JOIN LocalBusinesses ON BusinessPartnerships.business_id = LocalBusinesses.id JOIN Hotels ON BusinessPartnerships.hotel_id = Hotels.id WHERE LocalBusinesses.country IN ('Japan', 'South Korea') AND Hotels.country IN ('Japan', 'South Korea');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 282, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: For a team 2 of Al-Faisaly, what was the 2nd leg?", "schema": "CREATE TABLE table_name_96 (team_2 VARCHAR)", "sql": "SELECT 2 AS nd_leg FROM table_name_96 WHERE team_2 = 'al-faisaly';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1}
{"question": "What is the average carbon sequestration value in the 'central_region'?", "schema": "CREATE TABLE carbon_sequestration (id INT, region VARCHAR(50), value FLOAT); INSERT INTO carbon_sequestration (id, region, value) VALUES (1, 'Central Region', 56.78); INSERT INTO carbon_sequestration (id, region, value) VALUES (2, 'Northern Region', 34.56);", "sql": "SELECT AVG(value) FROM carbon_sequestration WHERE region = 'Central Region';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What's the lane with a time of 1:00.66?", "schema": "CREATE TABLE table_name_93 (lane INTEGER, time VARCHAR)", "sql": "SELECT AVG(lane) FROM table_name_93 WHERE time = '1:00.66';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1}
{"question": "Write the PL/pgSQL object from PostgreSQL regression test 'plpgsql' (example 539).", "schema": null, "sql": "$$ language plpgsql;\n\nselect stacked_diagnostics_test();\n\ndrop function stacked_diagnostics_test();\n\n-- test variadic functions\n\ncreate or replace function vari(variadic int[])\nreturns void as $$\nbegin\n for i in array_lower($1,1)..array_upper($1,1) loop\n raise notice '%', $1[i];", "explanation": "PL/pgSQL object from PostgreSQL core test for Plpgsql.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 283, "num_statements": 4}
{"question": "Generate PostgreSQL SQL for: Which To par has a Place in t9 and a Country of Spain?", "schema": "CREATE TABLE table_name_55 (to_par VARCHAR, place VARCHAR, country VARCHAR)", "sql": "SELECT to_par FROM table_name_55 WHERE place = 't9' AND country = 'spain';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1}
{"question": "What is the number of countries in the Latin America and Caribbean region that have reduced their carbon emissions in the last 5 years?", "schema": "CREATE TABLE country_emissions (name VARCHAR(50), region VARCHAR(50), year INT, carbon_emissions INT); INSERT INTO country_emissions (name, region, year, carbon_emissions) VALUES ('Country 1', 'Latin America and Caribbean', 2017, 10000); INSERT INTO country_emissions (name, region, year, carbon_emissions) VALUES ('Country 1', 'Latin America and Caribbean', 2018, 9000); INSERT INTO country_emissions (name, region, year, carbon_emissions) VALUES ('Country 1', 'Latin America and Caribbean', 2019, 8000); INSERT INTO country_emissions (name, region, year, carbon_emissions) VALUES ('Country 2', 'Latin America and Caribbean', 2017, 15000); INSERT INTO country_emissions (name, region, year, carbon_emissions) VALUES ('Country 2', 'Latin America and Caribbean', 2018, 14000); INSERT INTO country_emissions (name, region, year, carbon_emissions) VALUES ('Country 2', 'Latin America and Caribbean', 2019, 13000);", "sql": "SELECT region, COUNT(*) FROM country_emissions WHERE region = 'Latin America and Caribbean' AND carbon_emissions < (SELECT carbon_emissions FROM country_emissions WHERE name = 'Country 1' AND year = 2017 AND region = 'Latin America and Caribbean' ORDER BY year DESC LIMIT 1) GROUP BY region HAVING COUNT(*) > 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 311, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What is McCains percent when Obamas is 39.13%", "schema": "CREATE TABLE table_20688030_1 (mccain_number VARCHAR, obama_percentage VARCHAR)", "sql": "SELECT COUNT(mccain_number) FROM table_20688030_1 WHERE obama_percentage = '39.13%';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What is the lowest size of the school with titans as the mascot?", "schema": "CREATE TABLE table_name_93 (size INTEGER, mascot VARCHAR)", "sql": "SELECT MIN(size) FROM table_name_93 WHERE mascot = 'titans';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1}
{"question": "Delete any soil moisture readings that are older than 30 days.", "schema": "CREATE TABLE Soil_Moisture (ID INT, Moisture FLOAT, Timestamp DATETIME); INSERT INTO Soil_Moisture (ID, Moisture, Timestamp) VALUES (1, 45, '2022-01-01 10:00:00'), (2, 52, '2022-01-15 12:00:00');", "sql": "DELETE FROM Soil_Moisture WHERE Timestamp < NOW() - INTERVAL '30 days';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1}
{"question": "Show a query using PostgreSQL contrib extension 'citext' (example 330).", "schema": null, "sql": "SELECT btrim('xyxtrimyyx'::citext, 'xy'::text ) = 'trim' AS t;", "explanation": "Example query from the 'citext' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What Winning driver has a Winning constructor of talbot?", "schema": "CREATE TABLE table_name_59 (winning_driver VARCHAR, winning_constructor VARCHAR)", "sql": "SELECT winning_driver FROM table_name_59 WHERE winning_constructor = 'talbot';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1}
{"question": "What is the total quantity of recycled materials used by each brand?", "schema": "CREATE TABLE Brands (BrandID INT, BrandName VARCHAR(50)); INSERT INTO Brands (BrandID, BrandName) VALUES (1, 'BrandX'), (2, 'BrandY'), (3, 'BrandZ'); CREATE TABLE Products (ProductID INT, ProductName VARCHAR(50), BrandID INT, RecycledMaterials INT); INSERT INTO Products (ProductID, ProductName, BrandID, RecycledMaterials) VALUES (1, 'ProductA', 1, 25), (2, 'ProductB', 1, 30), (3, 'ProductC', 2, 15), (4, 'ProductD', 2, 0), (5, 'ProductE', 3, 35), (6, 'ProductF', 3, 40);", "sql": "SELECT BrandName, SUM(RecycledMaterials) as TotalRecycledMaterials FROM Brands b JOIN Products p ON b.BrandID = p.BrandID GROUP BY BrandName;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 141, "num_statements": 1}
{"question": "What is the average age of patients who identified as LGBTQ+ and have been diagnosed with anxiety?", "schema": "CREATE TABLE patients (id INT PRIMARY KEY, name VARCHAR(255), age INT, gender VARCHAR(50), ethnicity VARCHAR(255), condition VARCHAR(255)); INSERT INTO patients (id, name, age, gender, ethnicity, condition) VALUES (1, 'Alex Johnson', 30, 'Male', 'Latinx', 'Anxiety');", "sql": "SELECT AVG(age) as average_age FROM patients WHERE ethnicity LIKE '%LGBTQ+%' AND condition = 'Anxiety';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 103, "num_statements": 1}
{"question": "How many cases were handled by each attorney in the last year?", "schema": "CREATE TABLE cases (case_id INT, attorney_name VARCHAR(255), case_date DATE); INSERT INTO cases (case_id, attorney_name, case_date) VALUES (1, 'Smith', '2020-01-01'), (2, 'Jones', '2020-05-15'), (3, 'Jones', '2021-07-20'), (4, 'Smith', '2020-12-31'), (5, 'Brown', '2020-06-20');", "sql": "SELECT attorney_name, COUNT(*) FROM cases WHERE case_date >= DATE_SUB(CURRENT_DATE, INTERVAL 1 YEAR) GROUP BY attorney_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 124, "num_statements": 1}
{"question": "Insert a new product with id 8 from 'Italy'", "schema": "CREATE TABLE products (id INT PRIMARY KEY, name VARCHAR(100), category VARCHAR(50), price DECIMAL(5,2));", "sql": "INSERT INTO products (id, name, category, price) VALUES (8, 'Parmigiano Reggiano', 'Cheese', 29.99, 'Italy');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 109, "num_statements": 1}
{"question": "PostgreSQL regression test 'inet': Write the SELECT query (example 97).", "schema": null, "sql": "SELECT '127.0.0.2'::inet - ('127.0.0.2'::inet + 500);", "explanation": "Regression test for Inet in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT '127.0.0.2'::inet - ('127.0.0.2'::inet + 500)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1}
{"question": "PostgreSQL regression test 'horology': Write the SELECT query (example 252).", "schema": null, "sql": "SELECT to_timestamp('05121445482000', 'MMDDHH24MISSYYYY');", "explanation": "Regression test for Horology in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT to_timestamp('05121445482000', 'MMDDHH24MISSYYYY')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: How old was the person born 24 September 1851 at the time of disaster?", "schema": "CREATE TABLE table_name_81 (age_at_time_of_disaster VARCHAR, date_of_birth VARCHAR)", "sql": "SELECT age_at_time_of_disaster FROM table_name_81 WHERE date_of_birth = '24 september 1851';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1}
{"question": "Which menu items have a high calorie count and are also bestsellers?", "schema": "CREATE TABLE MenuItems (id INT, item VARCHAR(30), calories INT, popularity INT); INSERT INTO MenuItems (id, item, calories, popularity) VALUES (1, 'Cheese Burger', 600, 100), (2, 'Garden Salad', 200, 50);", "sql": "SELECT item, calories FROM MenuItems WHERE popularity > 70 ORDER BY calories DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Which party belongs to district 41, and is delegated by Jill P. Carter?", "schema": "CREATE TABLE table_name_61 (party VARCHAR, district VARCHAR, delegate VARCHAR)", "sql": "SELECT party FROM table_name_61 WHERE district = 41 AND delegate = 'jill p. carter';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: How much money does the player with a score of 76-70-65-68=279 have?", "schema": "CREATE TABLE table_name_24 (money___$__ VARCHAR, score VARCHAR)", "sql": "SELECT money___$__ FROM table_name_24 WHERE score = 76 - 70 - 65 - 68 = 279;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What was the TO par for the player who scored 70-68-74-70=282?", "schema": "CREATE TABLE table_name_26 (to_par VARCHAR, score VARCHAR)", "sql": "SELECT to_par FROM table_name_26 WHERE score = 70 - 68 - 74 - 70 = 282;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What is the constructor for the VII Race of Champions?", "schema": "CREATE TABLE table_name_34 (constructor VARCHAR, race_name VARCHAR)", "sql": "SELECT constructor FROM table_name_34 WHERE race_name = 'vii race of champions';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Who is the minister with a term end on 10 March 1974?", "schema": "CREATE TABLE table_name_71 (minister VARCHAR, term_end VARCHAR)", "sql": "SELECT minister FROM table_name_71 WHERE term_end = '10 march 1974';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1}
{"question": "Show a query using PostgreSQL contrib extension 'btree_gin' (example 9).", "schema": null, "sql": "SELECT * FROM test_float8 WHERE i>1::float8 ORDER BY i;", "explanation": "Example query from the 'btree_gin' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Where was there a result of 4-5 and a score of 1-1?", "schema": "CREATE TABLE table_name_98 (venue VARCHAR, result VARCHAR, score VARCHAR)", "sql": "SELECT venue FROM table_name_98 WHERE result = '4-5' AND score = '1-1';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What was zach johnson's score to par?", "schema": "CREATE TABLE table_275162_1 (to_par VARCHAR, player VARCHAR)", "sql": "SELECT to_par FROM table_275162_1 WHERE player = 'Zach Johnson';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What is the Date, when the Sport is luge - men's doubles, and when the Record is, \"start\"?", "schema": "CREATE TABLE table_name_90 (date VARCHAR, sport VARCHAR, record VARCHAR)", "sql": "SELECT date FROM table_name_90 WHERE sport = 'luge - men's doubles' AND record = 'start';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1}
{"question": "Find the number of Mars rovers with a mass greater than 500 kg", "schema": "CREATE TABLE rovers (id INT, name VARCHAR(50), mass INT, manufacturer VARCHAR(50));", "sql": "SELECT COUNT(*) FROM rovers WHERE mass > 500 AND planet = 'Mars';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What is the total top-25 of the U.S. Open, which has less than 7 cuts?", "schema": "CREATE TABLE table_name_84 (top_25 INTEGER, tournament VARCHAR, cuts_made VARCHAR)", "sql": "SELECT SUM(top_25) FROM table_name_84 WHERE tournament = 'u.s. open' AND cuts_made < 7;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: How many different titles does the representative whose mission was terminated on August 5, 1984 have?", "schema": "CREATE TABLE table_20065425_1 (title VARCHAR, termination_of_mission VARCHAR)", "sql": "SELECT COUNT(title) FROM table_20065425_1 WHERE termination_of_mission = 'August 5, 1984';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What are the distinct positions of the players from a country whose capital is Dublin?", "schema": "CREATE TABLE country (Country_id VARCHAR, Capital VARCHAR); CREATE TABLE match_season (Position VARCHAR, Country VARCHAR)", "sql": "SELECT DISTINCT T2.Position FROM country AS T1 JOIN match_season AS T2 ON T1.Country_id = T2.Country WHERE T1.Capital = 'Dublin';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 129, "num_statements": 1}
{"question": "List all rural infrastructure projects in India and their respective start dates.", "schema": "CREATE TABLE rural_infrastructure_projects (id INT, project_name VARCHAR(50), country VARCHAR(50), start_date DATE); INSERT INTO rural_infrastructure_projects (id, project_name, country, start_date) VALUES (1, 'Rajiv Gandhi Rural Electrification Program', 'India', '2010-04-01'), (2, 'BharatNet Rural Broadband Initiative', 'India', '2015-07-26');", "sql": "SELECT project_name, start_date FROM rural_infrastructure_projects WHERE country = 'India';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 1}
{"question": "List broadband subscribers who joined after mobile subscribers with the same region_id.", "schema": "CREATE TABLE mobile_subscribers (subscriber_id INT, region_id INT, join_date DATE); INSERT INTO mobile_subscribers (subscriber_id, region_id, join_date) VALUES (1, 1, '2021-01-01'), (2, 2, '2021-03-01'), (3, 3, '2021-02-01'), (4, 4, '2021-04-01'), (5, 1, '2021-01-15'), (6, 2, '2021-03-15'), (7, 3, '2021-02-15'), (8, 4, '2021-04-15'); CREATE TABLE broadband_subscribers (subscriber_id INT, region_id INT, join_date DATE); INSERT INTO broadband_subscribers (subscriber_id, region_id, join_date) VALUES (9, 1, '2021-01-20'), (10, 2, '2021-03-20'), (11, 3, '2021-02-20'), (12, 4, '2021-04-20'), (13, 1, '2021-01-25'), (14, 2, '2021-03-25'), (15, 3, '2021-02-25'), (16, 4, '2021-04-25');", "sql": "SELECT b.* FROM broadband_subscribers b INNER JOIN mobile_subscribers m ON b.region_id = m.region_id WHERE b.join_date > m.join_date;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 133, "num_statements": 1}
{"question": "What is the count of unique accommodations for 'MobilityAssistanceDog' in the 'ServiceAnimalAccommodations' table?", "schema": "CREATE TABLE ServiceAnimalAccommodations (service_animal_id INT, accommodation_type VARCHAR(255)); INSERT INTO ServiceAnimalAccommodations (service_animal_id, accommodation_type) VALUES (1001, 'VisualAssistanceDog'), (1002, 'HearingDog'), (1003, 'MobilityAssistanceDog'), (1004, 'MobilityAssistanceDog'), (1005, 'VisualAssistanceDog');", "sql": "SELECT COUNT(DISTINCT accommodation_type) FROM ServiceAnimalAccommodations WHERE accommodation_type = 'MobilityAssistanceDog';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 126, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What is the Location for the jiu-jitsu vs martial arts?", "schema": "CREATE TABLE table_name_36 (location VARCHAR, event VARCHAR)", "sql": "SELECT location FROM table_name_36 WHERE event = 'jiu-jitsu vs martial arts';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What's the Proto-Germanic when the German is /t/?", "schema": "CREATE TABLE table_name_48 (proto_germanic VARCHAR, german VARCHAR)", "sql": "SELECT proto_germanic FROM table_name_48 WHERE german = '/t/';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1}
{"question": "How many circular economy initiatives were launched in Osaka in 2020?", "schema": "CREATE TABLE circular_economy_initiatives(location VARCHAR(20), launch_date DATE); INSERT INTO circular_economy_initiatives VALUES('Osaka', '2020-01-01'), ('Osaka', '2020-03-15'), ('Tokyo', '2019-12-31');", "sql": "SELECT COUNT(*) as initiatives FROM circular_economy_initiatives WHERE location = 'Osaka' AND YEAR(launch_date) = 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 119, "num_statements": 1}
{"question": "What is the total number of fish in each farm in the aquaculture facility?", "schema": "CREATE TABLE salmon_farms (id INT, name TEXT, location TEXT, size INT);CREATE TABLE fish_population (id INT, farm_id INT, species TEXT, population INT, biomass FLOAT, date DATE);", "sql": "SELECT farm_id, SUM(population) FROM fish_population GROUP BY farm_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1}
{"question": "Show a SQL definition from the timescaledb project (plain, item 12).", "schema": null, "sql": "CREATE TABLE z (b TEXT, PRIMARY KEY(aa, b)) inherits (a);", "explanation": "SQL definition from the open-source timescaledb PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": true, "sql_length": 57, "num_statements": 1}
{"question": "What is the total number of workers from historically underrepresented communities in the Operations department?", "schema": "CREATE TABLE departments (id INT, name VARCHAR(255), diversity_stats VARCHAR(255)); INSERT INTO departments (id, name, diversity_stats) VALUES (1, 'HR', '{\"total_employees\":50, \"underrepresented\":20}'), (2, 'Operations', '{\"total_employees\":75, \"underrepresented\":15}'), (3, 'Finance', '{\"total_employees\":60, \"underrepresented\":10}');", "sql": "SELECT d.name AS department, JSON_EXTRACT(d.diversity_stats, '$.underrepresented') AS underrepresented_count FROM departments d WHERE d.name = 'Operations';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 156, "num_statements": 1}
{"question": "What was the total donation amount for the year 2020 in the 'Great Lakes' region?", "schema": "CREATE TABLE Donations (donation_id INT, region VARCHAR(20), amount DECIMAL(10,2), donation_year INT); INSERT INTO Donations (donation_id, region, amount, donation_year) VALUES (1, 'Great Lakes', 5000.00, 2020), (2, 'Southeast', 3000.00, 2020);", "sql": "SELECT SUM(amount) FROM Donations WHERE region = 'Great Lakes' AND donation_year = 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1}
{"question": "What was the total budget for criminal justice systems before 2018?", "schema": "CREATE TABLE public.criminal_justice (id serial PRIMARY KEY, name text, type text, budget integer, year integer); INSERT INTO public.criminal_justice (name, type, budget, year) VALUES ('Prison System', 'Corrections', 85000000, 2020), ('Police Department', 'Law Enforcement', 150000000, 2018);", "sql": "SELECT name, type, budget, year, (SELECT SUM(budget) FROM public.criminal_justice cj2 WHERE cj2.year < cj.year AND cj2.id <> cj.id) as total_budget_before FROM public.criminal_justice cj WHERE year < 2018;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 205, "num_statements": 1}
{"question": "Which urban farms in Oakland, CA have the highest yield per acre?", "schema": "CREATE TABLE urban_farms (name TEXT, city TEXT, state TEXT, acres NUMERIC, yield NUMERIC); INSERT INTO urban_farms (name, city, state, acres, yield) VALUES ('Groundwork', 'Oakland', 'CA', 2.5, 15000), ('City Slicker Farms', 'Oakland', 'CA', 3.2, 12000), ('Kinderfarms', 'Oakland', 'CA', 1.9, 8000);", "sql": "SELECT name, acres, yield, ROW_NUMBER() OVER (ORDER BY yield/acres DESC) as rank FROM urban_farms WHERE city = 'Oakland' AND state = 'CA';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 138, "num_statements": 1}
{"question": "How many articles were published per day in January 2020?", "schema": "CREATE TABLE articles (id INT, title VARCHAR(100), content TEXT, publish_date DATE); INSERT INTO articles (id, title, content, publish_date) VALUES (1, 'Article 1', 'Content 1', '2020-01-01'), (2, 'Article 2', 'Content 2', '2020-01-02'), (3, 'Article 3', 'Content 3', '2020-01-03');", "sql": "SELECT DATE_FORMAT(publish_date, '%Y-%m-%d') AS day, COUNT(*) as articles_per_day FROM articles WHERE YEAR(publish_date) = 2020 AND MONTH(publish_date) = 1 GROUP BY day;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 169, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Name the number of season number for jeff truman", "schema": "CREATE TABLE table_15838081_3 (season__number VARCHAR, writer_s_ VARCHAR)", "sql": "SELECT COUNT(season__number) FROM table_15838081_3 WHERE writer_s_ = 'Jeff Truman';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Which highschool has senior (rs) for the year?", "schema": "CREATE TABLE table_22496344_1 (high_school VARCHAR, year VARCHAR)", "sql": "SELECT high_school FROM table_22496344_1 WHERE year = 'Senior (RS)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1}
{"question": "pgTAP test for Functap (assertion 184).", "schema": null, "sql": "SELECT * FROM check_test(\n isnt_normal_function( 'tap_accum', ARRAY[etype()], 'whatever' ),\n true,\n 'isnt_normal_function(func, agg, desc)',\n 'whatever',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Functap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 175, "num_statements": 1}
{"question": "PostgreSQL Rules: show example 31.", "schema": null, "sql": "INSERT INTO shoelace_log VALUES ( new.sl_name, new.sl_avail, current_user, current_timestamp ) FROM shoelace_data new, shoelace_data old, shoelace_data shoelace_data WHERE new.sl_avail <> old.sl_avail ;", "explanation": "Example from PostgreSQL documentation on Rules.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": true, "sql_length": 202, "num_statements": 1}
{"question": "What is the minimum mental health score of students who identify as Indigenous, grouped by their ethnicity?", "schema": "CREATE TABLE students (student_id INT, ethnicity VARCHAR(255), mental_health_score INT); INSERT INTO students (student_id, ethnicity, mental_health_score) VALUES (1, 'Native American', 80), (2, 'Latino', 70), (3, 'Indigenous Australian', 90);", "sql": "SELECT ethnicity, MIN(mental_health_score) as min_score FROM students WHERE ethnicity LIKE '%Indigenous%' GROUP BY ethnicity;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 125, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What was the attendance for july 18?", "schema": "CREATE TABLE table_name_84 (attendance VARCHAR, date VARCHAR)", "sql": "SELECT COUNT(attendance) FROM table_name_84 WHERE date = 'july 18';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Where has a Rules of thai boxing, and a Round of n/a, and an Opponent of everton crawford?", "schema": "CREATE TABLE table_name_99 (location VARCHAR, opponent VARCHAR, rules VARCHAR, round VARCHAR)", "sql": "SELECT location FROM table_name_99 WHERE rules = 'thai boxing' AND round = 'n/a' AND opponent = 'everton crawford';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 115, "num_statements": 1}
{"question": "What is the average fare for bus routes serving low-income neighborhoods?", "schema": "CREATE TABLE bus_routes (route_id INT, neighborhood VARCHAR(255), fare DECIMAL(5,2)); INSERT INTO bus_routes (route_id, neighborhood, fare) VALUES (1, 'Westwood', 1.50), (2, 'Downtown', 2.50), (3, 'Eastside', 0.75);", "sql": "SELECT AVG(fare) FROM bus_routes WHERE neighborhood IN (SELECT neighborhood FROM low_income_neighborhoods);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 107, "num_statements": 1}
{"question": "What is the average duration of peacekeeping operations for each country?", "schema": "CREATE TABLE Peacekeeping_Operations (Operation_ID INT, Country_Name VARCHAR(50), Start_Date DATE, End_Date DATE); INSERT INTO Peacekeeping_Operations (Operation_ID, Country_Name, Start_Date, End_Date) VALUES (1, 'Bangladesh', '2005-01-01', '2007-12-31');", "sql": "SELECT Country_Name, AVG(DATEDIFF(End_Date, Start_Date)) as Average_Duration FROM Peacekeeping_Operations GROUP BY Country_Name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 128, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Who is the player when the pick is 22 and the year [A] is after 1979?", "schema": "CREATE TABLE table_name_5 (player_name VARCHAR, pick VARCHAR, year_ VARCHAR, a_ VARCHAR)", "sql": "SELECT player_name FROM table_name_5 WHERE pick = '22' AND year_[a_] > 1979;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1}
{"question": "PostgreSQL regression test 'arrays': Write the SELECT query (example 284).", "schema": null, "sql": "select '[2]={1,7}'::int[];", "explanation": "Regression test for Arrays in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select '[2]={1,7}'::int[]) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 26, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: How many draft copies does the document with id 2 have?", "schema": "CREATE TABLE Draft_Copies (document_id VARCHAR)", "sql": "SELECT COUNT(*) FROM Draft_Copies WHERE document_id = 2;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1}
{"question": "Insert records into the 'user_demographics' table", "schema": "CREATE TABLE user_demographics (user_id INT, age INT, gender VARCHAR(10), occupation VARCHAR(255));", "sql": "INSERT INTO user_demographics (user_id, age, gender, occupation) VALUES (1, 35, 'male', 'software engineer');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 109, "num_statements": 1}
{"question": "What is the maximum number of sessions attended by a patient in Spain for any therapy?", "schema": "CREATE TABLE therapy_attendance (id INT, patient_id INT, session_name TEXT, num_sessions INT, country TEXT);", "sql": "SELECT MAX(num_sessions) FROM therapy_attendance WHERE country = 'Spain';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1}
{"question": "What is the total revenue of sales in the last quarter for ethical clothing produced in factories with fair labor practices?", "schema": "CREATE TABLE Factories (factory_id INT, name VARCHAR(100), location VARCHAR(100), num_workers INT, has_fair_labor BOOLEAN); CREATE TABLE Products (product_id INT, name VARCHAR(100), factory_id INT, price DECIMAL(5,2)); CREATE TABLE Sales (sale_id INT, product_id INT, quantity INT, sale_date DATE); INSERT INTO Factories VALUES (1,'Factory A','New York',200,TRUE),(2,'Factory B','Mumbai',350,FALSE),(3,'Factory C','Dhaka',500,TRUE),(4,'Factory D','São Paulo',400,FALSE); INSERT INTO Products VALUES (1,'Eco T-Shirt',1,20.00),(2,'Fair Trade Blouse',2,30.00),(3,'Sustainable Skirt',3,40.00),(4,'Organic Cotton Jeans',3,50.00); INSERT INTO Sales VALUES (1,1,10,DATE '2022-01-01'),(2,2,15,DATE '2022-02-15'),(3,3,20,DATE '2022-03-30'),(4,4,25,DATE '2022-04-15');", "sql": "SELECT SUM(Products.price * Sales.quantity) FROM Factories JOIN Products ON Factories.factory_id = Products.factory_id JOIN Sales ON Products.product_id = Sales.product_id WHERE Factories.has_fair_labor = TRUE AND Sales.sale_date >= DATE '2022-01-01' AND Sales.sale_date < DATE '2022-04-01';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 291, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What is Player, when Pick is less than 161, and when College is \"Auburn University\"?", "schema": "CREATE TABLE table_name_58 (player VARCHAR, pick VARCHAR, college VARCHAR)", "sql": "SELECT player FROM table_name_58 WHERE pick < 161 AND college = 'auburn university';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1}
{"question": "What is the average age of aircraft manufactured by Boeing?", "schema": "CREATE TABLE Manufacturers (Id INT, Name VARCHAR(50)); INSERT INTO Manufacturers (Id, Name) VALUES (1, 'Boeing'), (2, 'Airbus'); CREATE TABLE Aircraft (Id INT, Name VARCHAR(50), Age INT, ManufacturerId INT); INSERT INTO Aircraft (Id, Name, Age, ManufacturerId) VALUES (1, '747', 50, 1), (2, '777', 25, 1), (3, 'A320', 10, 2), (4, 'A350', 5, 2);", "sql": "SELECT AVG(Aircraft.Age) FROM Aircraft JOIN Manufacturers ON Aircraft.ManufacturerId = Manufacturers.Id WHERE Manufacturers.Name = 'Boeing';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 140, "num_statements": 1}
{"question": "What is the maximum safety rating in the 'workplace_safety' table for workplaces with a union membership size greater than 100 in the 'labor_rights' table?", "schema": "CREATE TABLE workplace_safety (safety_rating INT, workplace_id INT); CREATE TABLE labor_rights (workplace_id INT, union_membership_size INT);", "sql": "SELECT MAX(workplace_safety.safety_rating) FROM workplace_safety INNER JOIN labor_rights ON workplace_safety.workplace_id = labor_rights.workplace_id WHERE labor_rights.union_membership_size > 100;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 197, "num_statements": 1}
{"question": "Write the DDL statement from PostgreSQL regression test 'publication' (example 108).", "schema": null, "sql": "CREATE TABLE testpub_rf_tbl2 (c text, d integer);", "explanation": "DDL from PostgreSQL core regression test for Publication.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1}
{"question": "Find the number of genetic research participants by project.", "schema": "CREATE TABLE research_participants (id INT, project_id INT, participant_id INT, date DATE);", "sql": "SELECT project_id, COUNT(DISTINCT participant_id) FROM research_participants GROUP BY project_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1}
{"question": "What is the maximum depth of the ocean?", "schema": "CREATE TABLE ocean_depth (location TEXT, depth INTEGER);", "sql": "SELECT MAX(depth) FROM ocean_depth;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 35, "num_statements": 1}
{"question": "Find the number of visitors who attended exhibitions in Tokyo or New York.", "schema": "CREATE TABLE Visitors (id INT, city VARCHAR(20)); INSERT INTO Visitors (id, city) VALUES (1, 'Tokyo'), (2, 'Paris'), (3, 'New York'), (4, 'Berlin');", "sql": "SELECT COUNT(*) FROM Visitors WHERE city IN ('Tokyo', 'New York');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1}
{"question": "How many water sources were offline for more than a month in the 'WaterSources' table?", "schema": "CREATE TABLE WaterSources (ID INT, SourceID INT, Status VARCHAR(10), LastOnline DATE); INSERT INTO WaterSources (ID, SourceID, Status, LastOnline) VALUES (1, 1, 'Online', '2022-01-01'); INSERT INTO WaterSources (ID, SourceID, Status, LastOnline) VALUES (2, 2, 'Offline', '2022-06-15');", "sql": "SELECT COUNT(*) FROM WaterSources WHERE Status = 'Offline' AND DATEDIFF(day, LastOnline, GETDATE()) > 30;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 105, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Which Attendance has a Result of l 26–16?", "schema": "CREATE TABLE table_name_47 (attendance VARCHAR, result VARCHAR)", "sql": "SELECT attendance FROM table_name_47 WHERE result = 'l 26–16';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1}
{"question": "What were the total sales of each drug in the Southeast region in Q2 2021?", "schema": "CREATE TABLE drugs (drug_id INT, drug_name VARCHAR(255)); INSERT INTO drugs (drug_id, drug_name) VALUES (1, 'DrugA'), (2, 'DrugB'); CREATE TABLE sales (sale_id INT, drug_id INT, region VARCHAR(255), sales_amount DECIMAL(10, 2), quarter INT, year INT); INSERT INTO sales (sale_id, drug_id, region, sales_amount, quarter, year) VALUES (1, 1, 'Southeast', 15000, 2, 2021), (2, 2, 'Southeast', 20000, 2, 2021);", "sql": "SELECT d.drug_name, SUM(s.sales_amount) as total_sales FROM drugs d JOIN sales s ON d.drug_id = s.drug_id WHERE s.region = 'Southeast' AND s.quarter = 2 AND s.year = 2021 GROUP BY d.drug_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 192, "num_statements": 1}
{"question": "Which community has the highest obesity rate in the US?", "schema": "CREATE TABLE Community (Name TEXT, State TEXT, ObesityRate FLOAT); INSERT INTO Community (Name, State, ObesityRate) VALUES ('Community A', 'California', 20.0); INSERT INTO Community (Name, State, ObesityRate) VALUES ('Community B', 'Texas', 25.0);", "sql": "SELECT Name, ObesityRate FROM Community WHERE State = 'US' ORDER BY ObesityRate DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1}
{"question": "What was the total number of volunteers registered in 'New York' and 'Florida'?", "schema": "CREATE TABLE Volunteers (volunteer_id INT, registration_date DATE, state VARCHAR(20)); INSERT INTO Volunteers (volunteer_id, registration_date, state) VALUES (1, '2022-01-01', 'New York'), (2, '2022-01-02', 'Florida');", "sql": "SELECT SUM(state = 'New York') + SUM(state = 'Florida') FROM Volunteers;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1}
{"question": "What is the total investment in companies with a high ESG rating (greater than 80)?", "schema": "CREATE TABLE investments (investment_id INT, company_id INT, investment_amount FLOAT); INSERT INTO investments (investment_id, company_id, investment_amount) VALUES (1, 1, 150000), (2, 2, 200000), (3, 3, 120000), (4, 5, 250000); CREATE TABLE companies (company_id INT, ESG_rating FLOAT); INSERT INTO companies (company_id, ESG_rating) VALUES (1, 85.2), (2, 78.1), (3, 88.5), (4, 65.3), (5, 82.7);", "sql": "SELECT SUM(investment_amount) FROM investments JOIN companies ON investments.company_id = companies.company_id WHERE companies.ESG_rating > 80;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 143, "num_statements": 1}
{"question": "What is the total number of customers who have made at least one transaction in the last week?", "schema": "CREATE TABLE customers (customer_id INT, name VARCHAR(50), last_transaction_date DATE); INSERT INTO customers (customer_id, name, last_transaction_date) VALUES (1, 'John Doe', '2022-02-05'), (2, 'Jane Smith', NULL), (3, 'Bob Johnson', '2022-02-02');", "sql": "SELECT COUNT(DISTINCT customer_id) FROM customers WHERE last_transaction_date >= DATE_SUB(CURDATE(), INTERVAL 1 WEEK);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 118, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: How long did it take to ride when the laps were 23 and the grid of 13?", "schema": "CREATE TABLE table_name_65 (time VARCHAR, laps VARCHAR, grid VARCHAR)", "sql": "SELECT time FROM table_name_65 WHERE laps = 23 AND grid = 13;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1}
{"question": "PostgreSQL regression test 'subselect': Write the SELECT query (example 292).", "schema": null, "sql": "select t1.ten, sum(x) from\n tenk1 t1 left join lateral (\n select t1.ten + t2.ten as x, t2.fivethous from tenk1 t2\n ) ss on t1.unique1 = ss.fivethous\ngroup by t1.ten\norder by t1.ten;", "explanation": "Regression test for Subselect in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select t1.ten, sum(x) from\n tenk1 t1 left join lateral (\n select t1.ten + t2.ten as x, t2.fivethous from tenk1 t2\n ) ss on t1.unique1 = ss.fivethous\ngroup by t1.ten\norder by t1.ten) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": true, "sql_length": 186, "num_statements": 1}
{"question": "How many mental health parity consultations were conducted by community health workers in California?", "schema": "CREATE TABLE community_health_workers (id INT, name TEXT, zip TEXT, consultations INT); INSERT INTO community_health_workers (id, name, zip, consultations) VALUES (1, 'John Doe', '90001', 30), (2, 'Jane Smith', '94117', 45);", "sql": "SELECT SUM(consultations) FROM community_health_workers WHERE zip BETWEEN '90001' AND '96162';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Name the landesliga mitte sv türk gücü münchen", "schema": "CREATE TABLE table_20181270_3 (landesliga_mitte VARCHAR, bayernliga VARCHAR)", "sql": "SELECT landesliga_mitte FROM table_20181270_3 WHERE bayernliga = 'SV Türk Gücü München';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1}
{"question": "What is the maximum financial capability score for individuals in India, grouped by gender?", "schema": "CREATE TABLE individuals (id INT, country VARCHAR(255), gender VARCHAR(255), financial_capability_score INT);", "sql": "SELECT gender, MAX(financial_capability_score) FROM individuals WHERE country = 'India' GROUP BY gender;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 104, "num_statements": 1}
{"question": "What is the count of students who received accommodations for 'Extended Testing Time' in the 'StudentAccommodations' table?", "schema": "CREATE TABLE StudentAccommodations (student_id INT, accommodation_type VARCHAR(255)); INSERT INTO StudentAccommodations (student_id, accommodation_type) VALUES (1, 'Sign Language Interpreter'), (2, 'Assistive Technology'), (3, 'Extended Testing Time'), (4, 'Extended Testing Time');", "sql": "SELECT COUNT(*) FROM StudentAccommodations WHERE accommodation_type = 'Extended Testing Time';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1}
{"question": "Show a query using PostgreSQL contrib extension 'pgcrypto' (example 11).", "schema": null, "sql": "select encrypt('foo', '0123456789012345678901', 'aes');", "explanation": "Example query from the 'pgcrypto' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Where was the game held that was played on 2002-03-07?", "schema": "CREATE TABLE table_name_36 (venue VARCHAR, date VARCHAR)", "sql": "SELECT venue FROM table_name_36 WHERE date = '2002-03-07';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1}
{"question": "Identify unique authors who have written for 'The Hindu' and 'NDTV' in May 2021.", "schema": "CREATE TABLE hindu (author_id INT, author_name VARCHAR(50), article_date DATE); INSERT INTO hindu (author_id, author_name, article_date) VALUES (1, 'Rajesh Patel', '2021-05-01'), (2, 'Priya Gupta', '2021-05-02'); CREATE TABLE ndtv (author_id INT, author_name VARCHAR(50), article_date DATE); INSERT INTO ndtv (author_id, author_name, article_date) VALUES (3, 'Meera Kapoor', '2021-05-01'), (4, 'Rajesh Patel', '2021-05-03');", "sql": "SELECT author_name FROM hindu WHERE article_date BETWEEN '2021-05-01' AND '2021-05-31' INTERSECT SELECT author_name FROM ndtv WHERE article_date BETWEEN '2021-05-01' AND '2021-05-31';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 183, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: what is the react when the country is sweden and the lane is higher than 6?", "schema": "CREATE TABLE table_name_65 (react INTEGER, country VARCHAR, lane VARCHAR)", "sql": "SELECT SUM(react) FROM table_name_65 WHERE country = 'sweden' AND lane > 6;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1}
{"question": "Which vessels have been inspected in the last month?", "schema": "CREATE TABLE Vessel (vessel_id INT, name VARCHAR(255), type VARCHAR(255), max_speed DECIMAL(5,2)); CREATE TABLE Inspection (inspection_id INT, vessel_id INT, inspection_time TIMESTAMP); INSERT INTO Vessel (vessel_id, name, type, max_speed) VALUES (1, 'Test Vessel 1', 'Cargo', 20.5), (2, 'Test Vessel 2', 'Tanker', 15.2); INSERT INTO Inspection (inspection_id, vessel_id, inspection_time) VALUES (1, 1, '2022-01-01 12:00:00'), (2, 2, '2022-01-15 10:00:00'), (3, 1, '2022-02-01 09:00:00');", "sql": "SELECT v.vessel_id, v.name FROM Vessel v INNER JOIN Inspection i ON v.vessel_id = i.vessel_id WHERE i.inspection_time >= NOW() - INTERVAL '1 month';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 148, "num_statements": 1}
{"question": "What is the minimum cargo weight handled by port 'Busan' and 'Incheon'?", "schema": "CREATE TABLE ports (port_id INT, port_name VARCHAR(255)); INSERT INTO ports (port_id, port_name) VALUES (1, 'Busan'), (2, 'Incheon'), (3, 'Daegu'); CREATE TABLE cargo (cargo_id INT, port_id INT, weight FLOAT); INSERT INTO cargo (cargo_id, port_id, weight) VALUES (1, 1, 1000), (2, 1, 1500), (3, 2, 800), (4, 3, 1200);", "sql": "SELECT MIN(weight) FROM cargo WHERE port_name IN ('Busan', 'Incheon');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1}
{"question": "List all unique medical conditions of astronauts from Brazil.", "schema": "CREATE TABLE AstronautMedical (id INT, astronaut_id INT, nationality VARCHAR(50), medical_condition VARCHAR(50)); INSERT INTO AstronautMedical (id, astronaut_id, nationality, medical_condition) VALUES (1, 121, 'Brazil', 'Hypercalcemia'); INSERT INTO AstronautMedical (id, astronaut_id, nationality, medical_condition) VALUES (2, 122, 'Brazil', 'Urinary Tract Infection'); INSERT INTO AstronautMedical (id, astronaut_id, nationality, medical_condition) VALUES (3, 123, 'Brazil', 'Nausea');", "sql": "SELECT DISTINCT medical_condition FROM AstronautMedical WHERE nationality = 'Brazil';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1}
{"question": "What is the total investment in economic diversification projects in Indonesia, Malaysia, and the Philippines, including only manufacturing and tourism projects?", "schema": "CREATE TABLE eco_diversification (id INT, name TEXT, location TEXT, investment FLOAT, project_type TEXT); INSERT INTO eco_diversification (id, name, location, investment, project_type) VALUES (1, 'Renewable Energy', 'Indonesia', 700000.00, 'Energy'), (2, 'Tourism Infrastructure', 'Malaysia', 600000.00, 'Tourism'), (3, 'Manufacturing Zone', 'Philippines', 800000.00, 'Manufacturing'), (4, 'Mining Concession', 'Indonesia', 900000.00, 'Mining');", "sql": "SELECT SUM(investment) FROM eco_diversification WHERE location IN ('Indonesia', 'Malaysia', 'Philippines') AND project_type IN ('Manufacturing', 'Tourism');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 156, "num_statements": 1}
{"question": "Show an example of PostgreSQL SELECT (example 10).", "schema": null, "sql": "SELECT * FROM distributors ORDER BY name; SELECT * FROM distributors ORDER BY 2; did | name -----+------------------ 109 | 20th Century Fox 110 | Bavaria Atelier 101 | British Lion 107 | Columbia 102 | Jean Luc Godard 113 | Luso films 104 | Mosfilm 103 | Paramount 106 | Toho 105 | United Artists 111 | Walt Disney 112 | Warner Bros. 108 | Westward;", "explanation": "PostgreSQL SELECT command.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 349, "num_statements": 3}
{"question": "Generate PostgreSQL SQL for: In which colleges is the NFL Team New York Giants and with the position defensive back?", "schema": "CREATE TABLE table_2508633_11 (college VARCHAR, nfl_team VARCHAR, position VARCHAR)", "sql": "SELECT college FROM table_2508633_11 WHERE nfl_team = 'New York Giants' AND position = 'Defensive back';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 104, "num_statements": 1}
{"question": "PostgreSQL regression test 'jsonb_jsonpath': Write the SELECT query (example 700).", "schema": null, "sql": "select jsonb_path_query_tz(\n\t'[\"2017-03-10\", \"2017-03-11\", \"2017-03-09\", \"2017-03-10 00:00:00\", \"2017-03-10 12:34:56\", \"2017-03-10 01:02:03+04\", \"2017-03-10 03:00:00+03\"]',\n\t'$[*].date() ? (@ == \"2017-03-10\".date())');", "explanation": "Regression test for Jsonb Jsonpath in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select jsonb_path_query_tz(\n\t'[\"2017-03-10\", \"2017-03-11\", \"2017-03-09\", \"2017-03-10 00:00:00\", \"2017-03-10 12:34:56\", \"2017-03-10 01:02:03+04\", \"2017-03-10 03:00:00+03\"]',\n\t'$[*].date() ? (@ == \"2017-03-10\".date())')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": false, "sql_length": 218, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What is the production number directed by Robert McKimson in series mm titled People Are Bunny?", "schema": "CREATE TABLE table_name_18 (production_number INTEGER, title VARCHAR, director VARCHAR, series VARCHAR)", "sql": "SELECT SUM(production_number) FROM table_name_18 WHERE director = 'robert mckimson' AND series = 'mm' AND title = 'people are bunny';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 133, "num_statements": 1}
{"question": "What is the total number of streams by country?", "schema": "CREATE TABLE Countries (CountryID int, CountryName varchar(255)); INSERT INTO Countries (CountryID, CountryName) VALUES (1, 'USA'), (2, 'UK'), (3, 'France'); CREATE TABLE StreamsByCountry (StreamID int, CountryID int, StreamCount int); INSERT INTO StreamsByCountry (StreamID, CountryID, StreamCount) VALUES (1, 1, 1000), (2, 2, 2000), (3, 1, 1500);", "sql": "SELECT Countries.CountryName, SUM(StreamsByCountry.StreamCount) as TotalStreams FROM Countries INNER JOIN StreamsByCountry ON Countries.CountryID = StreamsByCountry.CountryID GROUP BY Countries.CountryName;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 206, "num_statements": 1}
{"question": "Update the ocean_name for species_id 1 to 'Indian Ocean'.", "schema": "CREATE TABLE marine_species (species_id INT, species_name VARCHAR(50), ocean_name VARCHAR(50));", "sql": "UPDATE marine_species SET ocean_name = 'Indian Ocean' WHERE species_id = 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What is the average Position with less than 57 against and the team is Juventus?", "schema": "CREATE TABLE table_name_19 (position INTEGER, against VARCHAR, team VARCHAR)", "sql": "SELECT AVG(position) FROM table_name_19 WHERE against < 57 AND team = 'juventus';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1}
{"question": "What is the total number of citizen feedback records for public services in the city of Los Angeles?", "schema": "CREATE TABLE citizen_feedback (city VARCHAR(20), service VARCHAR(20)); INSERT INTO citizen_feedback (city, service) VALUES ('Los Angeles', 'public transportation'); INSERT INTO citizen_feedback (city, service) VALUES ('Los Angeles', 'public transportation'); INSERT INTO citizen_feedback (city, service) VALUES ('Los Angeles', 'public parks'); INSERT INTO citizen_feedback (city, service) VALUES ('San Diego', 'public libraries');", "sql": "SELECT COUNT(*) FROM citizen_feedback WHERE city = 'Los Angeles';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1}
{"question": "What is the total number of crimes committed in each city in the last 3 months?", "schema": "CREATE TABLE cities (id INT, name TEXT);CREATE TABLE crimes (id INT, city_id INT, date DATE);", "sql": "SELECT c.name, COUNT(cr.id) FROM cities c JOIN crimes cr ON c.id = cr.city_id WHERE cr.date >= DATEADD(month, -3, GETDATE()) GROUP BY c.id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 139, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: If the national trophy/rookie is Simone Iaquinta, what is the season total number?", "schema": "CREATE TABLE table_25563779_4 (season VARCHAR, national_trophy_rookie VARCHAR)", "sql": "SELECT COUNT(season) FROM table_25563779_4 WHERE national_trophy_rookie = 'Simone Iaquinta';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Who was the candidate when the result - % was 0.4%?", "schema": "CREATE TABLE table_25818630_2 (candidate VARCHAR, result____percentage VARCHAR)", "sql": "SELECT candidate FROM table_25818630_2 WHERE result____percentage = '0.4%';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Which name has a Kanji of 朧?", "schema": "CREATE TABLE table_name_77 (name VARCHAR, kanji VARCHAR)", "sql": "SELECT name FROM table_name_77 WHERE kanji = '朧';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What's the latest episode in a season where the U.S. viewers totaled 14.37 million?", "schema": "CREATE TABLE table_10842344_1 (no_in_season INTEGER, us_viewers__millions_ VARCHAR)", "sql": "SELECT MAX(no_in_season) FROM table_10842344_1 WHERE us_viewers__millions_ = '14.37';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1}
{"question": "What is the total number of professional development courses completed by teachers in each school, grouped by course type?", "schema": "CREATE TABLE teacher_pd (teacher_id INT, school_id INT, course_id INT, course_type VARCHAR(255)); CREATE TABLE courses (course_id INT, course_name VARCHAR(255), course_type VARCHAR(255)); CREATE TABLE schools (school_id INT, school_name VARCHAR(255));", "sql": "SELECT s.school_name, c.course_type, COUNT(DISTINCT t.teacher_id, t.course_id) as num_courses FROM teacher_pd t INNER JOIN schools s ON t.school_id = s.school_id INNER JOIN courses c ON t.course_id = c.course_id GROUP BY s.school_name, c.course_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 250, "num_statements": 1}
{"question": "Calculate the average fuel consumption per hour for the vessel 'Blue Whale' in the 'Tankers' fleet in the past week.", "schema": "CREATE TABLE Vessels (id INT, name VARCHAR(255)); INSERT INTO Vessels (id, name) VALUES (1, 'Blue Whale'); CREATE TABLE FuelConsumption (vessel_id INT, fuel_consumption INT, timestamp TIMESTAMP); INSERT INTO FuelConsumption (vessel_id, fuel_consumption, timestamp) VALUES (1, 500, '2022-07-01 10:00:00'), (1, 800, '2022-07-01 22:00:00');", "sql": "SELECT AVG(fuel_consumption / DATEDIFF(HOUR, LAG(timestamp) OVER (PARTITION BY vessel_id ORDER BY timestamp), timestamp)) as avg_fuel_consumption_per_hour FROM FuelConsumption WHERE vessel_id = 1 AND timestamp >= DATEADD(week, -1, GETDATE());", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 242, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Which Date has a Catalog of 887 195-2, and a Format of cd maxi?", "schema": "CREATE TABLE table_name_63 (date VARCHAR, catalog VARCHAR, format VARCHAR)", "sql": "SELECT date FROM table_name_63 WHERE catalog = '887 195-2' AND format = 'cd maxi';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: When 55 is the tries for what is the lost?", "schema": "CREATE TABLE table_17941032_1 (lost VARCHAR, tries_for VARCHAR)", "sql": "SELECT lost FROM table_17941032_1 WHERE tries_for = '55';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What does American have if Australia has əʉ?", "schema": "CREATE TABLE table_name_65 (american VARCHAR, australian VARCHAR)", "sql": "SELECT american FROM table_name_65 WHERE australian = 'əʉ';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1}
{"question": "Write the DDL statement from PostgreSQL regression test 'copy' (example 43).", "schema": null, "sql": "create table parted_copytest_a1 (c text, b int, a int);", "explanation": "DDL from PostgreSQL core regression test for Copy.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What was the finish for England?", "schema": "CREATE TABLE table_name_76 (finish VARCHAR, country VARCHAR)", "sql": "SELECT finish FROM table_name_76 WHERE country = 'england';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1}
{"question": "List all public transportation routes with more than 50% hybrid bus usage in London.", "schema": "CREATE TABLE public_transportation (route_id INT, route_name TEXT, vehicle_type TEXT, is_hybrid BOOLEAN, passengers INT);", "sql": "SELECT route_name FROM public_transportation WHERE vehicle_type = 'Bus' AND is_hybrid = TRUE GROUP BY route_name HAVING COUNT(*) FILTER (WHERE is_hybrid = TRUE) / COUNT(*) > 0.5 AND route_name LIKE 'London%';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": true, "sql_length": 208, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Where is Adam Gilchrist from?", "schema": "CREATE TABLE table_11303072_9 (nationality VARCHAR, player VARCHAR)", "sql": "SELECT nationality FROM table_11303072_9 WHERE player = 'Adam Gilchrist';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1}
{"question": "What are the total earnings of each eSports team in the last year?", "schema": "CREATE TABLE eSportsTeams (TeamID INT, TeamName VARCHAR(100), Earnings FLOAT, LastUpdated DATE); INSERT INTO eSportsTeams (TeamID, TeamName, Earnings, LastUpdated) VALUES (1, 'TeamA', 1000000, '2020-01-01'), (2, 'TeamB', 1200000, '2020-02-01'), (3, 'TeamC', 1100000, '2020-03-01');", "sql": "SELECT TeamName, SUM(Earnings) as TotalEarnings FROM eSportsTeams WHERE LastUpdated >= DATEADD(year, -1, GETDATE()) GROUP BY TeamName;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 134, "num_statements": 1}
{"question": "PL/pgSQL test: Plpython Spi (example 3).", "schema": null, "sql": "CREATE FUNCTION nested_call_three(a text) RETURNS text\n\tAS\n'return a'\n\tLANGUAGE plpython3u ;", "explanation": "PL/pgSQL example from PostgreSQL source test for Plpython Spi.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1}
{"question": "What is the total number of defense diplomacy events held in 'canada' between 2019 and 2021?", "schema": "CREATE TABLE defense_diplomacy (country VARCHAR(50), year INT, events INT); INSERT INTO defense_diplomacy (country, year, events) VALUES ('Canada', 2019, 15), ('Canada', 2020, 12), ('Canada', 2021, 18);", "sql": "SELECT country, SUM(events) as total_events FROM defense_diplomacy WHERE country = 'Canada' AND year BETWEEN 2019 AND 2021 GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 140, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What is the Akira when Prince Devitt is Devitt (7:20)?", "schema": "CREATE TABLE table_name_99 (akira VARCHAR, prince_devitt VARCHAR)", "sql": "SELECT akira FROM table_name_99 WHERE prince_devitt = 'devitt (7:20)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1}
{"question": "List the faculty members who have not published any papers, in alphabetical order.", "schema": "CREATE TABLE faculties (faculty_id INT, name VARCHAR(255), dept_id INT, num_publications INT);", "sql": "SELECT name FROM faculties WHERE num_publications = 0 ORDER BY name ASC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1}
{"question": "What is the average revenue per concert by country?", "schema": "CREATE TABLE concerts (id INT, artist_id INT, city VARCHAR(50), country VARCHAR(50), revenue FLOAT); INSERT INTO concerts (id, artist_id, city, country, revenue) VALUES (1, 1, 'Los Angeles', 'USA', 500000), (2, 1, 'New York', 'USA', 700000), (3, 2, 'Seoul', 'South Korea', 800000), (4, 2, 'Tokyo', 'Japan', 900000), (5, 3, 'Paris', 'France', 1000000), (6, 4, 'Osaka', 'Japan', 850000), (7, 1, 'London', 'UK', 600000);", "sql": "SELECT country, AVG(revenue) as avg_revenue FROM concerts GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: When the Vancouver Canucks were visiting, what was the record when the score was 4-2?", "schema": "CREATE TABLE table_name_6 (record VARCHAR, score VARCHAR, visitor VARCHAR)", "sql": "SELECT record FROM table_name_6 WHERE score = '4-2' AND visitor = 'vancouver canucks';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Which English has Dutch of tong?", "schema": "CREATE TABLE table_name_26 (english VARCHAR, dutch VARCHAR)", "sql": "SELECT english FROM table_name_26 WHERE dutch = 'tong';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1}
{"question": "Add a new record to the historical_sites table", "schema": "CREATE TABLE historical_sites (id INT PRIMARY KEY, name VARCHAR(255), country VARCHAR(255), type VARCHAR(255));", "sql": "INSERT INTO historical_sites (id, name, country, type) VALUES (1, 'Machu Picchu', 'Peru', 'cultural_heritage');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 111, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What is the result when team 1 is ICL Pakistan?", "schema": "CREATE TABLE table_17103566_1 (result VARCHAR, team_1 VARCHAR)", "sql": "SELECT result FROM table_17103566_1 WHERE team_1 = 'ICL Pakistan';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Who won the mens singles in 2009?", "schema": "CREATE TABLE table_12204717_1 (mens_singles VARCHAR, year VARCHAR)", "sql": "SELECT mens_singles FROM table_12204717_1 WHERE year = 2009;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1}
{"question": "Insert a new record into the \"resources\" table for a new gold mine in \"Peru\" with ID 901 and reserves of 5000 tons", "schema": "CREATE TABLE resources (id INT, mine_type VARCHAR(50), country VARCHAR(50), reserve_tons INT);", "sql": "INSERT INTO resources (id, mine_type, country, reserve_tons) VALUES (901, 'gold', 'Peru', 5000);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1}
{"question": "What's the total transaction volume for digital assets in the last month?", "schema": "CREATE TABLE digital_assets (id INT, name VARCHAR(255), transaction_volume DECIMAL(10, 2)); INSERT INTO digital_assets (id, name, transaction_volume) VALUES (1, 'Asset 1', 1000.50), (2, 'Asset 2', 1500.25), (3, 'Asset 3', 2000.00); CREATE TABLE transactions (id INT, digital_asset_id INT, transaction_date DATE); INSERT INTO transactions (id, digital_asset_id, transaction_date) VALUES (1, 1, '2022-01-01'), (2, 2, '2022-01-05'), (3, 3, '2022-01-10');", "sql": "SELECT SUM(transaction_volume) FROM digital_assets JOIN transactions ON digital_assets.id = transactions.digital_asset_id WHERE transaction_date >= DATE_SUB(CURRENT_DATE, INTERVAL 1 MONTH);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 189, "num_statements": 1}
{"question": "PostgreSQL regression test 'regproc': Write the SELECT query (example 13).", "schema": null, "sql": "SELECT to_regclass('pg_class');", "explanation": "Regression test for Regproc in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT to_regclass('pg_class')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 31, "num_statements": 1}
{"question": "What is the average age of patients who received therapy from counselors named \"James\" or \"Michelle\"?", "schema": "CREATE TABLE counselors (counselor_id INT, name TEXT, age INT); INSERT INTO counselors (counselor_id, name, age) VALUES (1, 'James', 35), (2, 'Michelle', 40); CREATE TABLE patients (patient_id INT, counselor_id INT, age INT); INSERT INTO patients (patient_id, counselor_id, age) VALUES (1, 1, 25), (2, 1, 30), (3, 2, 45), (4, 2, 50);", "sql": "SELECT AVG(patients.age) FROM patients JOIN counselors ON patients.counselor_id = counselors.counselor_id WHERE counselors.name IN ('James', 'Michelle');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 153, "num_statements": 1}
{"question": "What is the average distance of space debris generated by NASA from the Earth's center?", "schema": "CREATE TABLE space_debris (id INT, name VARCHAR(50), type VARCHAR(50), source VARCHAR(50), location POINT);", "sql": "SELECT AVG(DISTANCE(location, POINT(0, 0))) as average_distance FROM space_debris WHERE source = 'NASA';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 104, "num_statements": 1}
{"question": "PostgreSQL regression test 'jsonpath': Write the SELECT query (example 136).", "schema": null, "sql": "select '$ ? (@.a < +.1e-1)'::jsonpath;", "explanation": "Regression test for Jsonpath in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select '$ ? (@.a < +.1e-1)'::jsonpath) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 38, "num_statements": 1}
{"question": "List the virtual tours that are available in Spanish and have a cultural heritage focus, along with their average user rating.", "schema": "CREATE TABLE virtual_tour (id INT PRIMARY KEY, name VARCHAR(255), language VARCHAR(255), focus VARCHAR(255), user_rating DECIMAL(3,2)); INSERT INTO virtual_tour (id, name, language, focus, user_rating) VALUES (1, 'Tour of the Alhambra', 'Spanish', 'Cultural Heritage', 4.5);", "sql": "SELECT vt.name, vt.language, vt.focus, AVG(vt.user_rating) as avg_rating FROM virtual_tour vt WHERE vt.language = 'Spanish' AND vt.focus = 'Cultural Heritage' GROUP BY vt.name, vt.language, vt.focus;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 199, "num_statements": 1}
{"question": "Who are the volunteers that signed up in January and June of 2022?", "schema": "CREATE TABLE volunteers (volunteer_id INT, signup_date DATE); INSERT INTO volunteers (volunteer_id, signup_date) VALUES (1, '2022-01-05'), (2, '2022-03-30'), (3, '2022-04-15'), (4, '2022-06-10');", "sql": "SELECT volunteer_id, signup_date FROM volunteers WHERE (MONTH(signup_date) = 1 AND YEAR(signup_date) = 2022) OR (MONTH(signup_date) = 6 AND YEAR(signup_date) = 2022);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 166, "num_statements": 1}
{"question": "PostgreSQL regression test 'horology': Write the SELECT query (example 351).", "schema": null, "sql": "SELECT to_timestamp('2016-02-29 15:50:55', 'YYYY-MM-DD HH24:MI:SS'); -- ok\nSELECT to_timestamp('2015-02-29 15:50:55', 'YYYY-MM-DD HH24:MI:SS');", "explanation": "Regression test for Horology in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT to_timestamp('2016-02-29 15:50:55', 'YYYY-MM-DD HH24:MI:SS'); -- ok\nSELECT to_timestamp('2015-02-29 15:50:55', 'YYYY-MM-DD HH24:MI:SS')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 144, "num_statements": 2}
{"question": "Generate PostgreSQL SQL for: What is the Date of Week 2?", "schema": "CREATE TABLE table_name_47 (date VARCHAR, week VARCHAR)", "sql": "SELECT date FROM table_name_47 WHERE week = 2;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 46, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: who is the constructor when the tyre is d, the engine is talbot 23cv 4.5 l6, the chassis is talbot-lago t26c and the entrant is ecurie belge?", "schema": "CREATE TABLE table_name_75 (constructor VARCHAR, entrant VARCHAR, chassis VARCHAR, tyre VARCHAR, engine VARCHAR)", "sql": "SELECT constructor FROM table_name_75 WHERE tyre = 'd' AND engine = 'talbot 23cv 4.5 l6' AND chassis = 'talbot-lago t26c' AND entrant = 'ecurie belge';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 151, "num_statements": 1}
{"question": "How many 'Eco-Friendly' garments were sold in 'Australia' retail stores in Q1 of 2022?", "schema": "CREATE TABLE SalesStore (id INT PRIMARY KEY, store_name VARCHAR(50), location VARCHAR(50), garment_type VARCHAR(50), is_eco_friendly BOOLEAN, quantity INT, sale_date DATE); INSERT INTO SalesStore (id, store_name, location, garment_type, is_eco_friendly, quantity, sale_date) VALUES (1, 'Store D', 'Australia', 'Eco-Friendly T-Shirt', true, 30, '2022-01-15');", "sql": "SELECT SUM(quantity) as total_quantity FROM SalesStore WHERE location = 'Australia' AND is_eco_friendly = true AND sale_date BETWEEN '2022-01-01' AND '2022-03-31';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 163, "num_statements": 1}
{"question": "What is the number of cases with a female judge and male defendant?", "schema": "CREATE TABLE cases (id INT, judge_gender VARCHAR(6), defendant_gender VARCHAR(6)); INSERT INTO cases (id, judge_gender, defendant_gender) VALUES (1, 'Female', 'Male'), (2, 'Male', 'Female'), (3, 'Female', 'Male');", "sql": "SELECT COUNT(*) FROM cases WHERE judge_gender = 'Female' AND defendant_gender = 'Male';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What is the highest swimsuit a contestant from Kansas with an average larger than 8.48, an interview higher than 8.58, and an evening gown higher than 8.82 has?", "schema": "CREATE TABLE table_name_69 (swimsuit INTEGER, state VARCHAR, evening_gown VARCHAR, average VARCHAR, interview VARCHAR)", "sql": "SELECT MAX(swimsuit) FROM table_name_69 WHERE average > 8.48 AND interview > 8.58 AND evening_gown > 8.82 AND state = 'kansas';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 127, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Find the name of all the cities and states.", "schema": "CREATE TABLE addresses (town_city VARCHAR, state_province_county VARCHAR)", "sql": "SELECT town_city FROM addresses UNION SELECT state_province_county FROM addresses;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What is the enrollment at delaware valley college?", "schema": "CREATE TABLE table_261906_2 (joined_mac INTEGER, institution VARCHAR)", "sql": "SELECT MAX(joined_mac) FROM table_261906_2 WHERE institution = 'Delaware Valley College';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What is the area of Tuscany?", "schema": "CREATE TABLE table_14532_1 (area__km²_ INTEGER, region VARCHAR)", "sql": "SELECT MAX(area__km²_) FROM table_14532_1 WHERE region = 'Tuscany';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1}
{"question": "What was the waste generation in Bangkok in 2019?", "schema": "CREATE TABLE waste_generation_bangkok (year INT, total_waste INT); INSERT INTO waste_generation_bangkok (year, total_waste) VALUES (2018, 150000), (2019, 170000), (2020, 185000);", "sql": "SELECT total_waste FROM waste_generation_bangkok WHERE year = 2019;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1}
{"question": "Which destinations in Canada have the highest increase in visitors from 2019 to 2022?", "schema": "CREATE TABLE canada_tourism (destination VARCHAR(50), year INT, visitors INT); INSERT INTO canada_tourism (destination, year, visitors) VALUES ('Banff', 2019, 500000), ('Banff', 2022, 700000), ('Whistler', 2019, 300000), ('Whistler', 2022, 500000);", "sql": "SELECT destination, MAX(visitors) - MIN(visitors) AS increase FROM canada_tourism WHERE year IN (2019, 2022) GROUP BY destination ORDER BY increase DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 153, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Which position from the Cherkassy Monkeys' club was born after 1978?", "schema": "CREATE TABLE table_name_10 (position VARCHAR, year_born VARCHAR, current_club VARCHAR)", "sql": "SELECT position FROM table_name_10 WHERE year_born > 1978 AND current_club = 'cherkassy monkeys';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1}
{"question": "Delete all records from the 'readers' table where 'country' is 'United States'", "schema": "CREATE TABLE readers (id INT, name VARCHAR(50), country VARCHAR(50)); INSERT INTO readers (id, name, country) VALUES (1, 'Alice Johnson', 'United States'); INSERT INTO readers (id, name, country) VALUES (2, 'Bob Williams', 'Canada');", "sql": "DELETE FROM readers WHERE country = 'United States';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1}
{"question": "What is the average number of fouls committed by players from Argentina in the 'World Cup'?", "schema": "CREATE TABLE players (player_id INT, name TEXT, country TEXT); INSERT INTO players (player_id, name, country) VALUES (1, 'Messi', 'Argentina'), (2, 'Di Maria', 'Argentina'), (3, 'Kane', 'England'); CREATE TABLE fouls (foul_id INT, player_id INT, fouls INT); INSERT INTO fouls (foul_id, player_id, fouls) VALUES (1, 1, 2), (2, 1, 3), (3, 2, 1), (4, 3, 5); CREATE TABLE games (game_id INT, player_id INT, tournament TEXT); INSERT INTO games (game_id, player_id, tournament) VALUES (1, 1, 'World Cup'), (2, 1, 'World Cup'), (3, 2, 'World Cup'), (4, 3, 'World Cup');", "sql": "SELECT AVG(fouls) FROM fouls JOIN games ON fouls.player_id = games.player_id JOIN players ON fouls.player_id = players.player_id WHERE players.country = 'Argentina' AND games.tournament = 'World Cup';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 200, "num_statements": 1}
{"question": "pgTAP test for Index (assertion 75).", "schema": null, "sql": "SELECT * FROM check_test(\n index_is_primary( 'idx_baz' ),\n false,\n 'index_is_primary() fail index only',\n 'Index idx_baz should be on a primary key',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Index.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 171, "num_statements": 1}
{"question": "How many local businesses in Spain have benefited from sustainable tourism initiatives?", "schema": "CREATE TABLE local_businesses (business_id INT, business_name TEXT, country TEXT, sustainable_initiative BOOLEAN); INSERT INTO local_businesses (business_id, business_name, country, sustainable_initiative) VALUES (1, 'La Ribera Market', 'Spain', TRUE); INSERT INTO local_businesses (business_id, business_name, country, sustainable_initiative) VALUES (2, 'Gothic Quarter Shops', 'Spain', TRUE); INSERT INTO local_businesses (business_id, business_name, country, sustainable_initiative) VALUES (3, 'Barceloneta Fish Market', 'Spain', FALSE);", "sql": "SELECT COUNT(*) FROM local_businesses WHERE country = 'Spain' AND sustainable_initiative = TRUE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What are the goals for 8/4/04?", "schema": "CREATE TABLE table_name_70 (goals VARCHAR, date VARCHAR)", "sql": "SELECT goals FROM table_name_70 WHERE date = '8/4/04';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1}
{"question": "What is the total amount of donations made by donors from Palestine in the year 2019?", "schema": "CREATE TABLE donations (id INT, donor_id INT, donor_country TEXT, donation_date DATE, donation_amount DECIMAL); INSERT INTO donations (id, donor_id, donor_country, donation_date, donation_amount) VALUES (1, 1, 'Palestine', '2019-01-01', 50.00), (2, 2, 'Palestine', '2019-06-01', 100.00), (3, 3, 'Palestine', '2019-12-31', 25.00);", "sql": "SELECT SUM(donation_amount) FROM donations WHERE donor_country = 'Palestine' AND YEAR(donation_date) = 2019;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 108, "num_statements": 1}
{"question": "Write the DDL statement from PostgreSQL regression test 'rowsecurity' (example 296).", "schema": null, "sql": "CREATE TABLE s1 (a int, b text);", "explanation": "DDL from PostgreSQL core regression test for Rowsecurity.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 32, "num_statements": 1}
{"question": "What is the average product price for each category in the 'sustainable_products' table?", "schema": "CREATE TABLE sustainable_products (product_id INT, category VARCHAR(255), price DECIMAL(10,2));", "sql": "SELECT category, AVG(price) FROM sustainable_products GROUP BY category;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What is every Gregorian month when the season in Tamil is இளவேனில்?", "schema": "CREATE TABLE table_1740431_3 (gregorian_months VARCHAR, season_in_tamil VARCHAR)", "sql": "SELECT gregorian_months FROM table_1740431_3 WHERE season_in_tamil = 'இளவேனில்';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What's the population with a code more than 90902 and an area less than 1,335.47?", "schema": "CREATE TABLE table_name_19 (population INTEGER, code VARCHAR, area__km_2__ VARCHAR)", "sql": "SELECT MIN(population) FROM table_name_19 WHERE code > 90902 AND area__km_2__ < 1 OFFSET 335.47;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1}
{"question": "Display the total quantity of sustainable materials used by ethical fashion brands in the last quarter.", "schema": "CREATE TABLE Dates (date_id INT, date DATE); INSERT INTO Dates (date_id, date) VALUES (1, '2022-01-01'), (2, '2022-04-01'), (3, '2022-07-01'), (4, '2022-10-01'); CREATE TABLE SustainableBrands (brand_id INT, material_id INT, quantity INT, date_id INT); INSERT INTO SustainableBrands (brand_id, material_id, quantity, date_id) VALUES (1, 1, 500, 4), (1, 2, 300, 4), (2, 3, 700, 4), (3, 1, 400, 4);", "sql": "SELECT SUM(quantity) FROM SustainableBrands INNER JOIN Dates ON SustainableBrands.date_id = Dates.date_id WHERE date >= DATE_SUB(CURRENT_DATE, INTERVAL 3 MONTH) AND EXISTS (SELECT * FROM Brands WHERE Brands.brand_id = SustainableBrands.brand_id AND location = 'Europe');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 270, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Who is the champion of the 1994 season?", "schema": "CREATE TABLE table_2527617_1 (champion VARCHAR, season VARCHAR)", "sql": "SELECT champion FROM table_2527617_1 WHERE season = 1994;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Which Country has a Score of 70-68-70-68=276?", "schema": "CREATE TABLE table_name_43 (country VARCHAR, score VARCHAR)", "sql": "SELECT country FROM table_name_43 WHERE score = 70 - 68 - 70 - 68 = 276;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What is the last 10 meetings that have 11/26/1988 as the lasr meeting?", "schema": "CREATE TABLE table_name_32 (last_10_meetings VARCHAR, last_meeting VARCHAR)", "sql": "SELECT last_10_meetings FROM table_name_32 WHERE last_meeting = '11/26/1988';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1}
{"question": "pgTAP test for Pgtap--Unpackaged--0.91.0 (assertion 252).", "schema": null, "sql": "ALTER EXTENSION pgtap ADD FUNCTION _ikeys( NAME, NAME, NAME);", "explanation": "SQL assertion from pgTAP test suite for Pgtap--Unpackaged--0.91.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: List ids for all student who are on scholarship.", "schema": "CREATE TABLE Sportsinfo (StuID VARCHAR, onscholarship VARCHAR)", "sql": "SELECT StuID FROM Sportsinfo WHERE onscholarship = 'Y';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1}
{"question": "List the total number of pollution control initiatives implemented in the Atlantic and Arctic regions.", "schema": "CREATE TABLE PollutionControl (id INT, initiative VARCHAR(50), region VARCHAR(20)); INSERT INTO PollutionControl (id, initiative, region) VALUES (1, 'Ocean Cleanup', 'Arctic'), (2, 'Plastic Reduction', 'Atlantic'), (3, 'Carbon Capture', 'Global');", "sql": "SELECT region, COUNT(*) as total_initiatives FROM PollutionControl WHERE region IN ('Atlantic', 'Arctic') GROUP BY region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 122, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What was the score of the 3-0 Win result?", "schema": "CREATE TABLE table_name_91 (score VARCHAR, result VARCHAR)", "sql": "SELECT score FROM table_name_91 WHERE result = '3-0 win';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: The station located in Albuquerque has been owned since what year?", "schema": "CREATE TABLE table_11147852_1 (owned_since VARCHAR, city_of_license_market VARCHAR)", "sql": "SELECT owned_since FROM table_11147852_1 WHERE city_of_license_market = 'Albuquerque';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1}
{"question": "What is the average salary by department for employees who have been trained?", "schema": "CREATE TABLE Employees (EmployeeID INT, Department VARCHAR(20), Salary FLOAT, Trained BOOLEAN); INSERT INTO Employees (EmployeeID, Department, Salary, Trained) VALUES (1, 'IT', 75000.0, 1), (2, 'HR', 65000.0, 0), (3, 'IT', 80000.0, 1);", "sql": "SELECT Department, AVG(Salary) FROM Employees WHERE Trained = 1 GROUP BY Department;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1}
{"question": "List all tables and views in the 'telecom' schema", "schema": "CREATE SCHEMA telecom; CREATE TABLE mobile_subscribers (id INT, name TEXT, data_plan TEXT); CREATE VIEW broadband_subscribers AS SELECT * FROM subscribers WHERE type = 'broadband'; CREATE TABLE network_investments (year INT, amount FLOAT); CREATE TABLE compliance_reports (quarter INT, filed BOOLEAN);", "sql": "SELECT * FROM information_schema.tables WHERE table_schema = 'telecom';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": true, "sql_length": 71, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What was the score when they played the Boston Patriots?", "schema": "CREATE TABLE table_name_91 (result VARCHAR, opponent VARCHAR)", "sql": "SELECT result FROM table_name_91 WHERE opponent = 'boston patriots';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1}
{"question": "List the rural infrastructure projects and their budgets for 'rural_area_1' from the 'rural_infrastructure' and 'community_development' tables", "schema": "CREATE TABLE rural_infrastructure (project_id INT, project_type VARCHAR(50), budget INT, area_id INT); CREATE TABLE community_development (area_id INT, area_name VARCHAR(50));", "sql": "SELECT r.project_type, r.budget FROM rural_infrastructure r INNER JOIN community_development c ON r.area_id = c.area_id WHERE c.area_name = 'rural_area_1';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 155, "num_statements": 1}
{"question": "Show the total installed capacity of renewable energy projects for each location", "schema": "CREATE TABLE renewable_energy ( id INT PRIMARY KEY, location VARCHAR(255), project_name VARCHAR(255), installed_capacity INT ); INSERT INTO renewable_energy (id, location, project_name, installed_capacity) VALUES (1, 'Germany', 'Solarpark Finow Tower', 45000); INSERT INTO renewable_energy (id, location, project_name, installed_capacity) VALUES (2, 'France', 'La Plaine Wind Farm', 60000);", "sql": "SELECT location, SUM(installed_capacity) FROM renewable_energy GROUP BY location;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1}
{"question": "List all countries with their respective number of podcasts and the total duration of those podcasts.", "schema": "CREATE TABLE podcasts (id INT, name VARCHAR(255), country VARCHAR(255), duration INT); INSERT INTO podcasts (id, name, country, duration) VALUES (1, 'Podcast1', 'USA', 100), (2, 'Podcast2', 'UK', 200);", "sql": "SELECT country, COUNT(*) as num_podcasts, SUM(duration) as total_duration FROM podcasts GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 105, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: When was the WC 1974 Qualifying game?", "schema": "CREATE TABLE table_name_94 (date VARCHAR, type_of_game VARCHAR)", "sql": "SELECT date FROM table_name_94 WHERE type_of_game = 'wc 1974 qualifying';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1}
{"question": "PL/pgSQL test: Plpython Subtransaction (example 31).", "schema": null, "sql": "SELECT subtransaction_exit_without_enter();", "explanation": "PL/pgSQL example from PostgreSQL source test for Plpython Subtransaction.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 43, "num_statements": 1}
{"question": "List all volunteers who worked on 'Food Security' program in 'Asia'", "schema": "CREATE TABLE Volunteers (VolunteerID INT, VolunteerName TEXT, Country TEXT); INSERT INTO Volunteers (VolunteerID, VolunteerName, Country) VALUES (1, 'Anna Lee', 'Korea'); INSERT INTO Volunteers (VolunteerID, VolunteerName, Country) VALUES (2, 'Ravi Patel', 'India'); CREATE TABLE VolunteerHours (VolunteerID INT, Program TEXT, Hours DECIMAL); INSERT INTO VolunteerHours (VolunteerID, Program, Hours) VALUES (1, 'Food Security', 20); INSERT INTO VolunteerHours (VolunteerID, Program, Hours) VALUES (2, 'Food Security', 30);", "sql": "SELECT Volunteers.VolunteerName FROM Volunteers INNER JOIN VolunteerHours ON Volunteers.VolunteerID = VolunteerHours.VolunteerID WHERE VolunteerHours.Program = 'Food Security' AND Volunteers.Country = 'Asia';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 208, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: what are all the positions of players who's hometown is concord, california", "schema": "CREATE TABLE table_11677691_12 (position VARCHAR, hometown VARCHAR)", "sql": "SELECT position FROM table_11677691_12 WHERE hometown = 'Concord, California';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1}
{"question": "Show a query using PostgreSQL contrib extension 'cube' (example 160).", "schema": null, "sql": "SELECT cube_ur_coord('(1,2),(1,2)'::cube, 2);", "explanation": "Example query from the 'cube' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": true, "sql_length": 45, "num_statements": 1}
{"question": "Identify users who have posted more than 50 posts in the 'social_media' table.", "schema": "CREATE TABLE social_media (user_id INT, posts_count INT);", "sql": "SELECT user_id FROM social_media WHERE posts_count > 50;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: How many points did the Falcons score when the record was 4-4?", "schema": "CREATE TABLE table_16710971_2 (falcons_points VARCHAR, record VARCHAR)", "sql": "SELECT falcons_points FROM table_16710971_2 WHERE record = '4-4';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1}
{"question": "How many countries in Europe have been promoting sustainable tourism since 2019?", "schema": "CREATE TABLE Sustainable_Practices ( id INT PRIMARY KEY, country_id INT, certification_date DATE, FOREIGN KEY (country_id) REFERENCES Countries(id) ); INSERT INTO Sustainable_Practices (id, country_id, certification_date) VALUES (1, 7, '2019-07-01'); INSERT INTO Sustainable_Practices (id, country_id, certification_date) VALUES (2, 8, '2020-03-01');", "sql": "SELECT COUNT(DISTINCT c.id) as country_count FROM Countries c INNER JOIN Sustainable_Practices sp ON c.id = sp.country_id WHERE c.continent = 'Europe' AND sp.certification_date >= '2019-01-01';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 193, "num_statements": 1}
{"question": "What is the maximum duration of a space mission to Mars in days?", "schema": "CREATE TABLE SpaceMissions (MissionID INT, Name VARCHAR(50), LaunchDate DATE, Duration INT); INSERT INTO SpaceMissions VALUES (1, 'Curiosity', '2012-11-26', 669), (2, 'Perseverance', '2020-07-30', 342), (3, 'InSight', '2018-05-05', 913);", "sql": "SELECT MAX(Duration) FROM SpaceMissions WHERE Destination = 'Mars';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1}
{"question": "Count the number of esports events in 2022", "schema": "CREATE TABLE Esports_Events (id INT, name VARCHAR(50), event_date DATE); INSERT INTO Esports_Events (id, name, event_date) VALUES (1, 'Dreamhack', '2022-01-01'), (2, 'ESL One', '2021-01-01'), (3, 'IEM', '2022-03-01');", "sql": "SELECT COUNT(*) FROM Esports_Events WHERE event_date BETWEEN '2022-01-01' AND '2022-12-31';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 1}
{"question": "Find the number of founders who are women", "schema": "CREATE TABLE startup (id INT, name TEXT, founding_year INT, founder_gender TEXT);", "sql": "SELECT COUNT(*) FROM startup WHERE founder_gender = 'woman';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1}
{"question": "PL/pgSQL test: Plpgsql Transaction (example 23).", "schema": null, "sql": "$$;\n\nCALL transaction_test5();\n\n\n-- SECURITY DEFINER currently disallow transaction statements\nCREATE PROCEDURE transaction_test5b()\nLANGUAGE plpgsql\nSECURITY DEFINER\nAS $$\nBEGIN\n COMMIT;", "explanation": "PL/pgSQL example from PostgreSQL source test for Plpgsql Transaction.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 190, "num_statements": 3}
{"question": "How many astronauts are there in total from India and Japan?", "schema": "CREATE TABLE Astronauts (astronaut_id INT, name VARCHAR(255), country VARCHAR(255)); INSERT INTO Astronauts (astronaut_id, name, country) VALUES (1001, 'Rakesh Sharma', 'India'), (1002, 'Kalpana Chawla', 'US'), (1003, 'Sunita Williams', 'US'), (2001, 'Takao Doi', 'Japan'), (2002, 'Naoko Yamazaki', 'Japan');", "sql": "SELECT COUNT(*) FROM Astronauts WHERE country = 'India' OR country = 'Japan';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1}
{"question": "Which vessels have a compliance score below 70 and have traveled to the Arctic Ocean?", "schema": "CREATE TABLE vessels (id INT, name TEXT, type TEXT, compliance_score INT);CREATE TABLE routes (id INT, vessel_id INT, destination TEXT, date DATE); INSERT INTO vessels (id, name, type, compliance_score) VALUES (1, 'VesselF', 'Cargo', 65); INSERT INTO routes (id, vessel_id, destination, date) VALUES (1, 1, 'Arctic', '2022-02-15');", "sql": "SELECT v.name FROM vessels v JOIN routes r ON v.id = r.vessel_id WHERE v.compliance_score < 70 AND r.destination = 'Arctic';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 124, "num_statements": 1}
{"question": "What is the maximum billing amount for each attorney in the 'billing' table, grouped by their specialty?", "schema": "CREATE TABLE attorney (attorney_id INT, specialty VARCHAR(255)); INSERT INTO attorney (attorney_id, specialty) VALUES (1, 'Criminal Law'), (2, 'Family Law'), (3, 'Personal Injury'); CREATE TABLE billing (bill_id INT, attorney_id INT, amount DECIMAL(10,2)); INSERT INTO billing (bill_id, attorney_id, amount) VALUES (1, 1, 500.00), (2, 1, 750.00), (3, 2, 600.00), (4, 3, 800.00), (5, 3, 900.00);", "sql": "SELECT specialty, MAX(amount) FROM billing JOIN attorney ON billing.attorney_id = attorney.attorney_id GROUP BY specialty;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 122, "num_statements": 1}
{"question": "pgTAP test for Hastap (assertion 103).", "schema": null, "sql": "SELECT * FROM check_test(\n hasnt_type( 'public'::name, '__foobarbaz__'::name ),\n true,\n 'hasnt_type(scheam, type)',\n 'Type public.__foobarbaz__ should not exist',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Hastap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 184, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What duration is listed for Christian de la Fuente?", "schema": "CREATE TABLE table_11210576_3 (duration VARCHAR, actor VARCHAR)", "sql": "SELECT duration FROM table_11210576_3 WHERE actor = 'Christian de la Fuente';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What is the location code with the most documents?", "schema": "CREATE TABLE Document_locations (location_code VARCHAR)", "sql": "SELECT location_code FROM Document_locations GROUP BY location_code ORDER BY COUNT(*) DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 99, "num_statements": 1}
{"question": "How many marine species are endangered in the Indian Ocean?", "schema": "CREATE TABLE marine_species (name TEXT, region TEXT, endangered BOOLEAN); INSERT INTO marine_species (name, region, endangered) VALUES ('Whale Shark', 'Indian Ocean', TRUE), ('Dugong', 'Indian Ocean', TRUE);", "sql": "SELECT COUNT(*) FROM marine_species WHERE region = 'Indian Ocean' AND endangered = TRUE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What is Fiji's lowest total?", "schema": "CREATE TABLE table_name_63 (total INTEGER, country VARCHAR)", "sql": "SELECT MIN(total) FROM table_name_63 WHERE country = 'fiji';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1}
{"question": "Show a query using PostgreSQL contrib extension 'cube' (example 177).", "schema": null, "sql": "SELECT cube_enlarge('(0)'::cube, 1, 2);", "explanation": "Example query from the 'cube' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": true, "sql_length": 39, "num_statements": 1}
{"question": "Get the total area of the ocean floor mapped by region", "schema": "CREATE TABLE ocean_floor_mapping (mapping_id INT, region VARCHAR(255), area INT);", "sql": "SELECT region, SUM(area) FROM ocean_floor_mapping GROUP BY region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1}
{"question": "What is the total number of employees working in the 'manufacturing' department, excluding any employees who also appear in the 'training' table?", "schema": "CREATE TABLE companies (company_id INT, department VARCHAR(20)); INSERT INTO companies (company_id, department) VALUES (1, 'manufacturing'), (2, 'HR'), (3, 'manufacturing'); CREATE TABLE employees (employee_id INT, company_id INT); CREATE TABLE training (employee_id INT, training VARCHAR(20)); INSERT INTO employees (employee_id, company_id) VALUES (1, 1), (2, 1), (3, 2); INSERT INTO training (employee_id, training) VALUES (1, 'welding'), (2, 'safety'), (3, 'safety');", "sql": "SELECT COUNT(*) FROM companies INNER JOIN employees ON companies.company_id = employees.company_id WHERE companies.department = 'manufacturing' AND employees.employee_id NOT IN (SELECT employee_id FROM training);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 212, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What was the score of the game played in the 2012 Africa Cup of Nations?", "schema": "CREATE TABLE table_name_90 (score VARCHAR, competition VARCHAR)", "sql": "SELECT score FROM table_name_90 WHERE competition = '2012 africa cup of nations';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What date was the score 3–3, and away team was Barnet?", "schema": "CREATE TABLE table_name_96 (date VARCHAR, score VARCHAR, away_team VARCHAR)", "sql": "SELECT date FROM table_name_96 WHERE score = '3–3' AND away_team = 'barnet';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1}
{"question": "Write the DDL statement from PostgreSQL regression test 'stats_ext' (example 759).", "schema": null, "sql": "CREATE STATISTICS s_expr ON mod(id, 2), lower(col) FROM stats_ext_tbl;", "explanation": "DDL from PostgreSQL core regression test for Stats Ext.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What year was the startup for the Project Named of taq taq ph 2?", "schema": "CREATE TABLE table_name_51 (year_startup VARCHAR, project_name VARCHAR)", "sql": "SELECT year_startup FROM table_name_51 WHERE project_name = 'taq taq ph 2';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1}
{"question": "What is the maximum number of cases handled in a year by any mediation center?", "schema": "CREATE TABLE AnnualCasesHandled (ID INT, MediationCenter VARCHAR(50), Year INT, Cases INT); INSERT INTO AnnualCasesHandled (ID, MediationCenter, Year, Cases) VALUES (1, 'PeaceBuilders', 2005, 120), (2, 'CommunityHealers', 2010, 150), (3, 'HarmonyKeepers', 2008, 210), (4, 'UnityCreators', 2015, 200);", "sql": "SELECT MAX(Cases) FROM AnnualCasesHandled;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 42, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What date has a margin of victory of 1 stroke over Greg Kraft?", "schema": "CREATE TABLE table_name_21 (date VARCHAR, margin_of_victory VARCHAR, runner_s__up VARCHAR)", "sql": "SELECT date FROM table_name_21 WHERE margin_of_victory = '1 stroke' AND runner_s__up = 'greg kraft';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 100, "num_statements": 1}
{"question": "Show a query using PostgreSQL contrib extension 'btree_gin' (example 6).", "schema": null, "sql": "SELECT * FROM test_bpchar WHERE i<='abc' ORDER BY i;", "explanation": "Example query from the 'btree_gin' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1}
{"question": "Write the DML statement from PostgreSQL regression test 'returning' (example 47).", "schema": null, "sql": "DELETE FROM foo WHERE f2 = 'zit' RETURNING *;", "explanation": "DML from PostgreSQL core regression test for Returning.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": true, "sql_length": 45, "num_statements": 1}
{"question": "Write the DML statement from PostgreSQL regression test 'hash_index' (example 67).", "schema": null, "sql": "INSERT INTO hash_split_heap SELECT a/2 FROM generate_series(1, 25000) a;", "explanation": "DML from PostgreSQL core regression test for Hash Index.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1}
{"question": "What is the total value of all transactions on the Polygon network, grouped by month?", "schema": "CREATE TABLE polygon_transactions (transaction_id INT, tx_time TIMESTAMP, value DECIMAL(10, 2)); INSERT INTO polygon_transactions (transaction_id, tx_time, value) VALUES (1, '2022-01-01 10:00:00', 100), (2, '2022-01-02 11:00:00', 200), (3, '2022-01-03 12:00:00', 300), (4, '2022-01-04 13:00:00', 400), (5, '2022-01-05 14:00:00', 500);", "sql": "SELECT DATE_FORMAT(tx_time, '%Y-%m') AS month, SUM(value) AS total_value FROM polygon_transactions GROUP BY month;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 114, "num_statements": 1}
{"question": "What is the maximum construction cost of public works projects in the 'Africa' continent?", "schema": "CREATE TABLE Projects (id INT, name TEXT, country TEXT, cost FLOAT); INSERT INTO Projects (id, name, country, cost) VALUES (1, 'ProjectA', 'CountryX', 2000000.00), (2, 'ProjectB', 'CountryY', 2500500.75), (3, 'ProjectC', 'CountryZ', 1800000.50), (4, 'ProjectD', 'CountryA', 3000000.00); CREATE TABLE Countries (id INT, name TEXT, continent TEXT); INSERT INTO Countries (id, name, continent) VALUES (1, 'CountryX', 'Africa'), (2, 'CountryY', 'Africa'), (3, 'CountryZ', 'Europe'), (4, 'CountryA', 'Africa');", "sql": "SELECT MAX(cost) FROM Projects INNER JOIN Countries ON Projects.country = Countries.name WHERE Countries.continent = 'Africa';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 126, "num_statements": 1}
{"question": "PostgreSQL regression test 'numeric': Write the SELECT query (example 713).", "schema": null, "sql": "SELECT to_char('100'::numeric, 'f\"\\\\ool\"999');", "explanation": "Regression test for Numeric in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT to_char('100'::numeric, 'f\"\\\\ool\"999')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 46, "num_statements": 1}
{"question": "What is the percentage of menu items that are vegetarian for each cuisine type?", "schema": "CREATE TABLE restaurants (id INT, name VARCHAR(50), cuisine VARCHAR(50), menu_item_id INT); CREATE TABLE menu_items (id INT, name VARCHAR(50), vegetarian BOOLEAN); INSERT INTO restaurants (id, name, cuisine, menu_item_id) VALUES (1, 'Restaurant A', 'Italian', 1), (2, 'Restaurant B', 'Mexican', 2), (3, 'Restaurant C', 'Chinese', 3); INSERT INTO menu_items (id, name, vegetarian) VALUES (1, 'Pizza Margherita', FALSE), (2, 'Tacos Al Pastor', FALSE), (3, 'Kung Pao Chicken', FALSE), (4, 'Vegetable Stir Fry', TRUE);", "sql": "SELECT r.cuisine, 100.0 * COUNT(CASE WHEN m.vegetarian THEN 1 END) / COUNT(*) as vegetarian_percentage FROM restaurants r JOIN menu_items m ON r.menu_item_id = m.id GROUP BY r.cuisine;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 184, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: For which tournament was the margin of victory 7 strokes?", "schema": "CREATE TABLE table_name_50 (tournament VARCHAR, margin_of_victory VARCHAR)", "sql": "SELECT tournament FROM table_name_50 WHERE margin_of_victory = '7 strokes';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1}
{"question": "What is the total number of basketball games played in the 2022 season?", "schema": "CREATE TABLE games (season TEXT, sport TEXT, total_games INT); INSERT INTO games (season, sport, total_games) VALUES ('2022', 'Basketball', 1230); INSERT INTO games (season, sport, total_games) VALUES ('2022', 'Football', 272);", "sql": "SELECT SUM(total_games) FROM games WHERE season = '2022' AND sport = 'Basketball';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1}
{"question": "Find the average renewable energy production of each city in France.", "schema": "CREATE TABLE renewable_energy_france (id INT, city VARCHAR(255), production FLOAT); INSERT INTO renewable_energy_france (id, city, production) VALUES (1, 'Paris', 2000), (2, 'Lyon', 2500), (3, 'Marseille', 1500);", "sql": "SELECT city, AVG(production) FROM renewable_energy_france GROUP BY city;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: When was the earliest founded university?", "schema": "CREATE TABLE table_261941_1 (founded INTEGER)", "sql": "SELECT MIN(founded) FROM table_261941_1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 40, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What was the B.P. of club Halifax?", "schema": "CREATE TABLE table_19179465_1 (bp VARCHAR, club VARCHAR)", "sql": "SELECT bp FROM table_19179465_1 WHERE club = 'Halifax';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1}
{"question": "PostgreSQL regression test 'jsonb': Write the SELECT query (example 197).", "schema": null, "sql": "select jsonb_object('{a,b,c,\"d e f\"}','{1,2,3,\"a b c\",g}');", "explanation": "Regression test for Jsonb in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select jsonb_object('{a,b,c,\"d e f\"}','{1,2,3,\"a b c\",g}')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What are the names of entrepreneurs?", "schema": "CREATE TABLE people (Name VARCHAR, People_ID VARCHAR); CREATE TABLE entrepreneur (People_ID VARCHAR)", "sql": "SELECT T2.Name FROM entrepreneur AS T1 JOIN people AS T2 ON T1.People_ID = T2.People_ID;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: When the wallington new foresters are in division four what is the season?", "schema": "CREATE TABLE table_24575253_4 (season VARCHAR, division_four VARCHAR)", "sql": "SELECT season FROM table_24575253_4 WHERE division_four = 'Wallington New Foresters';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: List the rank of the player that left due to right wrist surgery?", "schema": "CREATE TABLE table_24431264_18 (rank VARCHAR, withdrew_due_to VARCHAR)", "sql": "SELECT rank FROM table_24431264_18 WHERE withdrew_due_to = 'right wrist surgery';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: where did arron oberholser play?", "schema": "CREATE TABLE table_name_84 (country VARCHAR, player VARCHAR)", "sql": "SELECT country FROM table_name_84 WHERE player = 'arron oberholser';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1}
{"question": "PostgreSQL regression test 'misc': Write the SELECT query (example 52).", "schema": null, "sql": "SELECT name(equipment(hobby_construct(text 'skywalking', text 'mer')));", "explanation": "Regression test for Misc in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT name(equipment(hobby_construct(text 'skywalking', text 'mer')))) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1}
{"question": "What is the average temperature required for growing cotton?", "schema": "CREATE TABLE Crop (id INT, name VARCHAR(255), minimum_temperature INT, maximum_temperature INT); INSERT INTO Crop (id, name, minimum_temperature, maximum_temperature) VALUES (1, 'Cotton', 15, 30), (2, 'Soybean', 10, 35), (3, 'Corn', 5, 30);", "sql": "SELECT AVG(Crop.minimum_temperature + (Crop.maximum_temperature - Crop.minimum_temperature)/2) FROM Crop WHERE Crop.name = 'Cotton';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 132, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What is the highest total with a t12 finish?", "schema": "CREATE TABLE table_name_61 (total INTEGER, finish VARCHAR)", "sql": "SELECT MAX(total) FROM table_name_61 WHERE finish = 't12';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1}
{"question": "What is the total production capacity of all tanks located in the Western region?", "schema": "CREATE TABLE Production (tank VARCHAR(20), capacity INT, location VARCHAR(20)); INSERT INTO Production (tank, capacity, location) VALUES ('Tank7', 200000, 'Western'), ('Tank8', 250000, 'Western');", "sql": "SELECT SUM(capacity) FROM Production WHERE location = 'Western';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1}
{"question": "What is the total investment in network infrastructure for each quarter of the year?", "schema": "CREATE TABLE infrastructure_investments (investment_date DATE, investment_amount DECIMAL(10,2)); INSERT INTO infrastructure_investments (investment_date, investment_amount) VALUES ('2021-01-01', 250000), ('2021-04-01', 300000), ('2021-07-01', 200000), ('2021-10-01', 350000);", "sql": "SELECT EXTRACT(QUARTER FROM investment_date) AS quarter, SUM(investment_amount) FROM infrastructure_investments GROUP BY quarter;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 129, "num_statements": 1}
{"question": "What is the total Shariah-compliant loan amount issued per month?", "schema": "CREATE TABLE shariah_compliant_loans_over_time (id INT, loan_date DATE, amount FLOAT); INSERT INTO shariah_compliant_loans_over_time (id, loan_date, amount) VALUES (1, '2021-01-01', 350000), (2, '2021-02-01', 400000), (3, '2021-03-01', 450000), (4, '2021-01-01', 200000), (5, '2021-02-01', 300000);", "sql": "SELECT DATE_FORMAT(loan_date, '%Y-%m') as month, SUM(amount) as total_amount FROM shariah_compliant_loans_over_time GROUP BY month ORDER BY month;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 146, "num_statements": 1}
{"question": "What is the total production quantity (in metric tons) of Holmium from the mine with the ID 1 for the year 2017?", "schema": "CREATE TABLE production (id INT, mine_id INT, year INT, element TEXT, production_quantity INT); INSERT INTO production (id, mine_id, year, element, production_quantity) VALUES (1, 1, 2017, 'Holmium', 100), (2, 2, 2017, 'Holmium', 150), (3, 3, 2017, 'Holmium', 200), (4, 1, 2017, 'Dysprosium', 200), (5, 2, 2017, 'Dysprosium', 250), (6, 3, 2017, 'Dysprosium', 300);", "sql": "SELECT SUM(production_quantity) FROM production WHERE mine_id = 1 AND year = 2017 AND element = 'Holmium';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 106, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Which Set 2 has a Date of 25 may, and a Set 3 of 21–25?", "schema": "CREATE TABLE table_name_45 (set_2 VARCHAR, date VARCHAR, set_3 VARCHAR)", "sql": "SELECT set_2 FROM table_name_45 WHERE date = '25 may' AND set_3 = '21–25';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: what's the poles with pts being 81", "schema": "CREATE TABLE table_14139408_1 (poles VARCHAR, pts VARCHAR)", "sql": "SELECT poles FROM table_14139408_1 WHERE pts = '81';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1}
{"question": "PostgreSQL regression test 'join': Write the SELECT query (example 509).", "schema": null, "sql": "select p.* from\n (parent p left join child c on (p.k = c.k)) join parent x on p.k = x.k\n where p.k = 1 and p.k = 2;", "explanation": "Regression test for Join in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select p.* from\n (parent p left join child c on (p.k = c.k)) join parent x on p.k = x.k\n where p.k = 1 and p.k = 2) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 117, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What is the to par of the player with a 68-67-75=210?", "schema": "CREATE TABLE table_name_99 (to_par VARCHAR, score VARCHAR)", "sql": "SELECT to_par FROM table_name_99 WHERE score = 68 - 67 - 75 = 210;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: what is the minimum voted yes where percent no is 68.2", "schema": "CREATE TABLE table_120778_2 (voted_yes INTEGER, percent_no VARCHAR)", "sql": "SELECT MIN(voted_yes) FROM table_120778_2 WHERE percent_no = '68.2';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1}
{"question": "Which train stations have accessibility features?", "schema": "CREATE TABLE train_stations (station_id INT, station_name TEXT, is_accessible BOOLEAN); INSERT INTO train_stations (station_id, station_name, is_accessible) VALUES (1, 'Union Station', true), (2, 'City Hall', false), (3, 'Downtown Crossing', true);", "sql": "SELECT station_id, station_name, is_accessible FROM train_stations WHERE is_accessible = true;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: HOW MUCH WAS THE OVERALL FOR ERIK KARLSSON?", "schema": "CREATE TABLE table_11803648_17 (overall INTEGER)", "sql": "SELECT MIN(overall) FROM table_11803648_17;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 43, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What is the rank of Manuel Cortina Martínez?", "schema": "CREATE TABLE table_name_46 (rank VARCHAR, athletes VARCHAR)", "sql": "SELECT rank FROM table_name_46 WHERE athletes = 'manuel cortina martínez';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: How many episodes had their first air date on March 6, 2008?", "schema": "CREATE TABLE table_11220799_2 (episode_titles VARCHAR, first_air_date VARCHAR)", "sql": "SELECT COUNT(episode_titles) FROM table_11220799_2 WHERE first_air_date = 'March 6, 2008';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1}
{"question": "What is the maximum number of military personnel deployed by African Union in peacekeeping operations?", "schema": "CREATE SCHEMA if not exists defense; CREATE TABLE if not exists african_union_pk_operations (id INT PRIMARY KEY, year INT, military_personnel INT); INSERT INTO african_union_pk_operations (id, year, military_personnel) VALUES (1, 2018, 3000), (2, 2019, 3500), (3, 2020, 4000), (4, 2021, 4500);", "sql": "SELECT MAX(military_personnel) FROM defense.african_union_pk_operations;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1}
{"question": "Which players from the 'players' table have the highest average scores in the 'scores' table, and how many high scores did they achieve?", "schema": "CREATE TABLE players (player_id INT, name VARCHAR(50)); INSERT INTO players VALUES (1, 'John'); INSERT INTO players VALUES (2, 'Jane'); CREATE TABLE scores (score_id INT, player_id INT, score INT); INSERT INTO scores VALUES (1, 1, 90); INSERT INTO scores VALUES (2, 1, 95); INSERT INTO scores VALUES (3, 2, 85); INSERT INTO scores VALUES (4, 2, 88);", "sql": "SELECT p.name, AVG(s.score) as avg_score, COUNT(*) as high_scores FROM players p JOIN scores s ON p.player_id = s.player_id WHERE s.score >= (SELECT AVG(score) FROM scores) GROUP BY p.player_id ORDER BY avg_score DESC, high_scores DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 236, "num_statements": 1}
{"question": "What is the average number of green buildings per state in the 'green_buildings' table?", "schema": "CREATE TABLE green_buildings (state VARCHAR(255), building_type VARCHAR(255));", "sql": "SELECT state, AVG(cnt) FROM (SELECT state, COUNT(*) AS cnt FROM green_buildings GROUP BY state) AS state_building_counts;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 121, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: How many people wrote episode number 2 of the season?", "schema": "CREATE TABLE table_25737761_4 (writer VARCHAR, _number VARCHAR)", "sql": "SELECT COUNT(writer) FROM table_25737761_4 WHERE _number = 2;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1}
{"question": "Who are the genetic researchers working on gene editing techniques in the UK?", "schema": "CREATE SCHEMA if not exists genetics; CREATE TABLE if not exists genetics.researchers (id INT, name VARCHAR(100), country VARCHAR(50), expertise VARCHAR(50)); INSERT INTO genetics.researchers (id, name, country, expertise) VALUES (1, 'John Doe', 'UK', 'CRISPR'); INSERT INTO genetics.researchers (id, name, country, expertise) VALUES (2, 'Jane Smith', 'US', 'CRISPR');", "sql": "SELECT name FROM genetics.researchers WHERE country = 'UK' AND expertise = 'CRISPR';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: How many 2006 subscribers are named Vodafone?", "schema": "CREATE TABLE table_29395291_2 (subscribers__2006___thousands_ VARCHAR, provider VARCHAR)", "sql": "SELECT COUNT(subscribers__2006___thousands_) FROM table_29395291_2 WHERE provider = 'Vodafone';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What is the party where the constituency is 10. Tindivanam?", "schema": "CREATE TABLE table_22753245_1 (party VARCHAR, constituency VARCHAR)", "sql": "SELECT party FROM table_22753245_1 WHERE constituency = '10. Tindivanam';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What is Ips-provectus transmittance/contrast ratio?", "schema": "CREATE TABLE table_name_16 (transmittance__contrast_ratio VARCHAR, name VARCHAR)", "sql": "SELECT transmittance__contrast_ratio FROM table_name_16 WHERE name = 'ips-provectus';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1}
{"question": "What is the minimum number of visits for any exhibition?", "schema": "CREATE TABLE ExhibitionStats (exhibition_id INT, min_visits INT, max_visits INT); INSERT INTO ExhibitionStats (exhibition_id, min_visits, max_visits) VALUES (1, 1000, 2000), (2, 1500, 2500), (3, 2000, 3000);", "sql": "SELECT MIN(min_visits) FROM ExhibitionStats;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 44, "num_statements": 1}
{"question": "What is the total volume of freight forwarded from Japan to Singapore?", "schema": "CREATE TABLE Singapore_Freight (id INT, origin_country VARCHAR(50), destination_country VARCHAR(50), volume FLOAT); INSERT INTO Singapore_Freight (id, origin_country, destination_country, volume) VALUES (1, 'Japan', 'Singapore', 123.5), (2, 'Japan', 'Singapore', 234.6), (3, 'Malaysia', 'Singapore', 345.7);", "sql": "SELECT SUM(volume) FROM Singapore_Freight WHERE origin_country = 'Japan' AND destination_country = 'Singapore';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 111, "num_statements": 1}
{"question": "Write the PL/pgSQL object from PostgreSQL regression test 'strings' (example 83).", "schema": null, "sql": "--\n-- test SQL string functions\n-- E### and T### are feature reference numbers from SQL99\n--\n\n-- E021-09 trim function\nSELECT TRIM(BOTH FROM ' bunch o blanks ') = 'bunch o blanks' AS \"bunch o blanks\";", "explanation": "PL/pgSQL object from PostgreSQL core test for Strings.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 202, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Which Cultural and Educational Panel has a Labour Panel larger than 5, and an Industrial and Commercial Panel larger than 9?", "schema": "CREATE TABLE table_name_9 (cultural_and_educational_panel VARCHAR, labour_panel VARCHAR, industrial_and_commercial_panel VARCHAR)", "sql": "SELECT COUNT(cultural_and_educational_panel) FROM table_name_9 WHERE labour_panel > 5 AND industrial_and_commercial_panel > 9;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 126, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What is the result for the Green-Communist party when the Left Bloc has 3.0%?", "schema": "CREATE TABLE table_name_87 (green_communist VARCHAR, left_bloc VARCHAR)", "sql": "SELECT green_communist FROM table_name_87 WHERE left_bloc = '3.0%';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1}
{"question": "What is the total recycling rate for each region in the year 2020?", "schema": "CREATE TABLE RecyclingAmount (region VARCHAR(50), year INT, amount FLOAT); INSERT INTO RecyclingAmount (region, year, amount) VALUES ('Region1', 2018, 600000.0), ('Region1', 2019, 700000.0), ('Region1', 2020, 800000.0), ('Region2', 2018, 500000.0), ('Region2', 2019, 600000.0), ('Region2', 2020, 700000.0);", "sql": "SELECT r.region, (SUM(r.amount) / (SELECT SUM(wg.amount) FROM WasteGeneration wg WHERE wg.year = 2020 AND EXISTS (SELECT 1 FROM RecyclingCenters rc WHERE rc.region = wg.city)) * 100) FROM RecyclingAmount r WHERE r.year = 2020 GROUP BY r.region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 244, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What are the average points that have december 27?", "schema": "CREATE TABLE table_name_36 (points INTEGER, december VARCHAR)", "sql": "SELECT AVG(points) FROM table_name_36 WHERE december = 27;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1}
{"question": "Which countries in Africa have a carbon pricing (in USD/ton) that is lower than the average for the continent?", "schema": "CREATE TABLE africa_carbon_pricing (id INT, country VARCHAR(50), price FLOAT); INSERT INTO africa_carbon_pricing (id, country, price) VALUES (1, 'South Africa', 10.5), (2, 'Egypt', 15.2), (3, 'Nigeria', 5.1);", "sql": "SELECT country, price FROM africa_carbon_pricing WHERE price < (SELECT AVG(price) FROM africa_carbon_pricing);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 110, "num_statements": 1}
{"question": "How many rural hospitals are in the \"rural_hospitals_2\" table?", "schema": "CREATE TABLE rural_hospitals_2 (id INT, name TEXT, location TEXT, capacity INT); INSERT INTO rural_hospitals_2 (id, name, location, capacity) VALUES (1, 'Hospital C', 'City3', 75), (2, 'Hospital D', 'City4', 60);", "sql": "SELECT COUNT(*) FROM rural_hospitals_2;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 39, "num_statements": 1}
{"question": "What is the total revenue generated by ticket sales for each month and team?", "schema": "CREATE TABLE monthly_ticket_sales (ticket_id INT, team_id INT, date DATE, price INT);", "sql": "SELECT EXTRACT(MONTH FROM date) as month, team_id, SUM(price) as total_revenue FROM monthly_ticket_sales GROUP BY month, team_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 129, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: what is the tries against where points is 60?", "schema": "CREATE TABLE table_12828723_4 (tries_against VARCHAR, points VARCHAR)", "sql": "SELECT tries_against FROM table_12828723_4 WHERE points = '60';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What is the minimum laid down?", "schema": "CREATE TABLE table_12592074_1 (laid_down INTEGER)", "sql": "SELECT MIN(laid_down) FROM table_12592074_1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 44, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: How many states are there?", "schema": "CREATE TABLE area_code_state (Id VARCHAR)", "sql": "SELECT COUNT(*) FROM area_code_state;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 37, "num_statements": 1}
{"question": "How many unique fans reside in 'FL' and have an average ticket spending of over $50 in the 'fan_demographics' and 'ticket_sales' tables?", "schema": "CREATE TABLE fan_demographics (fan_id INT, age INT, state VARCHAR(2)); CREATE TABLE ticket_sales (ticket_id INT, fan_id INT, event_id INT, price DECIMAL(5,2));", "sql": "SELECT COUNT(DISTINCT fan_id) FROM fan_demographics fd JOIN ticket_sales ts ON fd.fan_id = ts.fan_id WHERE fd.state = 'FL' AND (ts.price / (SELECT COUNT(*) FROM ticket_sales ts2 WHERE ts.fan_id = ts2.fan_id)) > 50;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 214, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Where did Footscray play as the away team?", "schema": "CREATE TABLE table_name_27 (venue VARCHAR, away_team VARCHAR)", "sql": "SELECT venue FROM table_name_27 WHERE away_team = 'footscray';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: The wildcats belong to what school?", "schema": "CREATE TABLE table_name_38 (school VARCHAR, team VARCHAR)", "sql": "SELECT school FROM table_name_38 WHERE team = 'wildcats';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1}
{"question": "What is the oldest chemical still in production?", "schema": "CREATE TABLE chemical_lifecycle (id INT PRIMARY KEY, chemical_name VARCHAR(255), year_introduced INT, production_status VARCHAR(255)); INSERT INTO chemical_lifecycle (id, chemical_name, year_introduced, production_status) VALUES (1, 'Hydrochloric Acid', 1950, 'Produced'); INSERT INTO chemical_lifecycle (id, chemical_name, year_introduced, production_status) VALUES (2, 'Sodium Hydroxide', 1980, 'Discontinued');", "sql": "SELECT chemical_name, MIN(year_introduced) AS first_introduced FROM chemical_lifecycle WHERE production_status = 'Produced' GROUP BY chemical_name ORDER BY first_introduced;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 173, "num_statements": 1}
{"question": "Write the DDL statement from PostgreSQL regression test 'spgist' (example 1).", "schema": null, "sql": "create table spgist_point_tbl(id int4, p point);", "explanation": "DDL from PostgreSQL core regression test for Spgist.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 48, "num_statements": 1}
{"question": "Find the average age and total salary of employees in the 'mining_operations' table, for employees with the job_title 'Engineer'?", "schema": "CREATE TABLE employees (id INT, first_name VARCHAR(50), last_name VARCHAR(50), job_title VARCHAR(50), department VARCHAR(50), age INT, salary DECIMAL(10,2), PRIMARY KEY (id)); INSERT INTO employees (id, first_name, last_name, job_title, department, age, salary) VALUES (1, 'John', 'Doe', 'Engineer', 'Mining', 35, 80000.00), (2, 'Jane', 'Doe', 'Operator', 'Mining', 28, 60000.00), (3, 'Mike', 'Johnson', 'Manager', 'Environment', 45, 90000.00);", "sql": "SELECT AVG(age), AVG(salary) FROM employees WHERE job_title = 'Engineer';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1}
{"question": "What is the total quantity of resources depleted in each region for the past 5 years?", "schema": "CREATE TABLE resources (id INT, region TEXT, quantity FLOAT); CREATE TABLE depletions (resource_id INT, year INT, quantity FLOAT); INSERT INTO resources (id, region, quantity) VALUES (1, 'Region A', 50000.0), (2, 'Region B', 60000.0); INSERT INTO depletions (resource_id, year, quantity) VALUES (1, 2017, 5000.0), (1, 2018, 5500.0), (1, 2019, 6000.0), (1, 2020, 6500.0), (1, 2021, 7000.0), (2, 2017, 6000.0), (2, 2018, 6500.0), (2, 2019, 7000.0), (2, 2020, 7500.0), (2, 2021, 8000.0);", "sql": "SELECT resources.region, SUM(depletions.quantity) FROM resources INNER JOIN depletions ON resources.id = depletions.resource_id WHERE depletions.year BETWEEN 2017 AND 2021 GROUP BY resources.region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 198, "num_statements": 1}
{"question": "What is the trend in mental health parity violations for each state over the past four years?", "schema": "CREATE TABLE mental_health_parity_trend (state VARCHAR(2), year INT, violations INT); INSERT INTO mental_health_parity_trend (state, year, violations) VALUES ('CA', 2018, 10), ('CA', 2019, 15), ('CA', 2020, 20), ('CA', 2021, 25), ('NY', 2018, 15), ('NY', 2019, 20), ('NY', 2020, 25), ('NY', 2021, 30), ('TX', 2018, 5), ('TX', 2019, 10), ('TX', 2020, 15), ('TX', 2021, 20);", "sql": "SELECT m.state, m.year, m.violations, LAG(m.violations) OVER (PARTITION BY m.state ORDER BY m.year) as prev_year_violations FROM mental_health_parity_trend m;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 158, "num_statements": 1}
{"question": "What is the total energy consumption per mining company, and the average energy consumption per mining operation?", "schema": "CREATE TABLE mining_companies (company_id INT, company_name TEXT); INSERT INTO mining_companies (company_id, company_name) VALUES (1001, 'Mining Corp A'), (1002, 'Mining Corp B'), (1003, 'Mining Corp C'); CREATE TABLE mining_operations (operation_id INT, operation_name TEXT, country TEXT, energy_consumption FLOAT, company_id INT); INSERT INTO mining_operations (operation_id, operation_name, country, energy_consumption, company_id) VALUES (1, 'Porgera Mine', 'Papua New Guinea', 20000, 1001), (2, 'Cerro Verde Mine', 'Peru', 30000, 1001), (3, 'Gruyere Mine', 'Australia', 40000, 1002), (4, 'Veladero Mine', 'Argentina', 50000, 1002), (5, 'Kidd Mine', 'Canada', 60000, 1003), (6, 'Ducktown Mine', 'USA', 70000, 1003);", "sql": "SELECT mining_companies.company_name, SUM(mining_operations.energy_consumption) AS total_energy_consumption FROM mining_companies JOIN mining_operations ON mining_companies.company_id = mining_operations.company_id GROUP BY mining_companies.company_name; SELECT AVG(energy_consumption) AS average_energy_consumption FROM mining_operations;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 339, "num_statements": 2}
{"question": "What is the minimum and maximum water pH for each tank in the 'tank_data' table?", "schema": "CREATE TABLE tank_data (tank_id INT, species VARCHAR(255), water_ph DECIMAL(5,2)); INSERT INTO tank_data (tank_id, species, water_ph) VALUES (1, 'Tilapia', 7.5), (2, 'Salmon', 6.0), (3, 'Tilapia', 7.8), (4, 'Catfish', 7.2), (5, 'Salmon', 6.5);", "sql": "SELECT tank_id, MIN(water_ph) as min_ph, MAX(water_ph) as max_ph FROM tank_data GROUP BY tank_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1}
{"question": "What is the yearly growth rate in revenue for the \"hip-hop\" genre in the Asian region?", "schema": "CREATE TABLE YearlyRevenue(id INT, genre VARCHAR(10), region VARCHAR(10), revenue FLOAT, year INT);", "sql": "SELECT (SUM(revenue) OVER (PARTITION BY genre ORDER BY year ROWS BETWEEN 1 FOLLOWING AND UNBOUNDED FOLLOWING) - SUM(revenue))/SUM(revenue) OVER (PARTITION BY genre) AS yearly_growth_rate FROM YearlyRevenue WHERE genre = 'hip-hop' AND region = 'Asian';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 251, "num_statements": 1}
{"question": "What is the average sales figure for drugs that were approved by the EMA and contain the word 'Plus' in their name?", "schema": "CREATE TABLE drug (id INT, name TEXT, approval_authority TEXT, sales FLOAT); INSERT INTO drug (id, name, approval_authority, sales) VALUES (1, 'DrugA Plus', 'EMA', 30000000); INSERT INTO drug (id, name, approval_authority, sales) VALUES (2, 'DrugB', 'EMA', 40000000); INSERT INTO drug (id, name, approval_authority, sales) VALUES (3, 'PlusDrug', 'EMA', 50000000);", "sql": "SELECT AVG(sales) FROM drug WHERE approval_authority = 'EMA' AND name LIKE '%Plus%';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1}
{"question": "What is the average CO2 emission of each textile mill, grouped by country and displayed in ascending order based on the emission amount?", "schema": "CREATE TABLE CO2Emission (mill TEXT, country TEXT, co2_kg FLOAT); INSERT INTO CO2Emission (mill, country, co2_kg) VALUES ('MillA', 'China', 120.5), ('MillB', 'Bangladesh', 80.3), ('MillC', 'Vietnam', 160.1), ('MillD', 'India', 95.6);", "sql": "SELECT country, AVG(co2_kg) as avg_co2 FROM CO2Emission GROUP BY country ORDER BY avg_co2 ASC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Name the location that was founded 1798", "schema": "CREATE TABLE table_2293402_2 (location VARCHAR, founded VARCHAR)", "sql": "SELECT location FROM table_2293402_2 WHERE founded = 1798;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1}
{"question": "What is the percentage of Indigenous employees in the Mining department?", "schema": "CREATE TABLE Employees (EmployeeID INT, Name VARCHAR(50), Department VARCHAR(50), Gender VARCHAR(50), Community VARCHAR(50)); INSERT INTO Employees (EmployeeID, Name, Department, Gender, Community) VALUES (8, 'Emily Brown', 'Mining', 'Female', 'Indigenous'); INSERT INTO Employees (EmployeeID, Name, Department, Gender, Community) VALUES (9, 'Michael White', 'Mining', 'Male', 'Indigenous');", "sql": "SELECT (COUNT(*) * 100.0 / (SELECT COUNT(*) FROM Employees WHERE Department = 'Mining')) AS Percentage FROM Employees WHERE Department = 'Mining' AND Community = 'Indigenous';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 175, "num_statements": 1}
{"question": "What is the distribution of energy sources in rural areas?", "schema": "CREATE TABLE energy_sources (location VARCHAR(50), source VARCHAR(50), percentage FLOAT);", "sql": "SELECT source, SUM(percentage) AS percentage FROM energy_sources WHERE location = 'rural' GROUP BY source;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 106, "num_statements": 1}
{"question": "What is the market share of electric vehicles in India?", "schema": "CREATE TABLE VehicleSales (Type VARCHAR(50), Country VARCHAR(50), Sales INT); INSERT INTO VehicleSales (Type, Country, Sales) VALUES ('Electric', 'India', 50000), ('Gasoline', 'India', 2000000), ('Diesel', 'India', 1500000);", "sql": "SELECT (Sales * 100.0 / (SELECT SUM(Sales) FROM VehicleSales WHERE Country = 'India')) FROM VehicleSales WHERE Type = 'Electric' AND Country = 'India';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 151, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What is the total sum of 50m splits for josefin lillhage in lanes above 8?", "schema": "CREATE TABLE table_name_24 (split__50m_ INTEGER, name VARCHAR, lane VARCHAR)", "sql": "SELECT SUM(split__50m_) FROM table_name_24 WHERE name = 'josefin lillhage' AND lane > 8;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1}
{"question": "Delete records in the CargoTable that have a weight of zero", "schema": "CREATE TABLE CargoTable (CargoId INT PRIMARY KEY, VesselId INT, CargoName VARCHAR(50), Weight INT);", "sql": "DELETE FROM CargoTable WHERE Weight = 0;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 40, "num_statements": 1}
{"question": "Number of public schools in each region of Brazil in 2021.", "schema": "CREATE TABLE schools (id INT, name VARCHAR(50), region VARCHAR(50), country VARCHAR(50), year INT); INSERT INTO schools (id, name, region, country, year) VALUES (1, 'Escola Estadual', 'Sudeste', 'Brazil', 2021), (2, 'Colégio Municipal', 'Nordeste', 'Brazil', 2021), (3, 'Escola Municipal', 'Centro-Oeste', 'Brazil', 2021), (4, 'Escola Federal', 'Norte', 'Brazil', 2021), (5, 'Escola Particular', 'Sul', 'Brazil', 2021);", "sql": "SELECT region, COUNT(*) FROM schools WHERE country = 'Brazil' GROUP BY region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What's the sum of swing to gain with a winning party 2007 of Conservative with a rank smaller than 5?", "schema": "CREATE TABLE table_name_70 (swing_to_gain INTEGER, winning_party_2007 VARCHAR, rank VARCHAR)", "sql": "SELECT SUM(swing_to_gain) FROM table_name_70 WHERE winning_party_2007 = 'conservative' AND rank < 5;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 100, "num_statements": 1}
{"question": "Get the total energy storage capacity (MWh) in France", "schema": "CREATE TABLE energy_storage (id INT, country VARCHAR(50), capacity FLOAT); INSERT INTO energy_storage (id, country, capacity) VALUES (1, 'United Kingdom', 3000), (2, 'Germany', 4000), (3, 'France', 2500);", "sql": "SELECT SUM(capacity) FROM energy_storage WHERE country = 'France';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1}
{"question": "Which countries participated in space exploration missions to Venus?", "schema": "CREATE TABLE Venus_Missions (Mission_ID INT, Mission_Name VARCHAR(50), Country VARCHAR(50), Launch_Year INT, PRIMARY KEY (Mission_ID)); INSERT INTO Venus_Missions (Mission_ID, Mission_Name, Country, Launch_Year) VALUES (1, 'Venera 7', 'Soviet Union', 1970), (2, 'Magellan', 'United States', 1989), (3, 'Akatsuki', 'Japan', 2010);", "sql": "SELECT DISTINCT Country FROM Venus_Missions;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 44, "num_statements": 1}
{"question": "How many unique customers prefer cruelty-free cosmetics and have made a purchase in the last month?", "schema": "CREATE TABLE sales (sale_id INT, customer_id INT, product_id INT, sale_date DATE); INSERT INTO sales (sale_id, customer_id, product_id, sale_date) VALUES (1, 1001, 1, '2022-03-05'), (2, 1002, 2, '2022-03-06'), (3, 1003, 3, '2022-03-07'), (4, 1004, 4, '2022-03-08'), (5, 1001, 5, '2022-04-01'), (6, 1005, 1, '2022-04-02'); CREATE TABLE consumer_preferences (preference_id INT, customer_id INT, preference VARCHAR(255)); INSERT INTO consumer_preferences (preference_id, customer_id, preference) VALUES (1, 1001, 'Cruelty-free'), (2, 1002, 'Vegan'), (3, 1003, 'Natural'), (4, 1004, 'Cruelty-free'), (5, 1005, 'Cruelty-free');", "sql": "SELECT COUNT(DISTINCT sales.customer_id) FROM sales JOIN consumer_preferences ON sales.customer_id = consumer_preferences.customer_id WHERE preference = 'Cruelty-free' AND sale_date >= DATE_SUB(CURRENT_DATE, INTERVAL 1 MONTH);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 226, "num_statements": 1}
{"question": "Write the DDL statement from PostgreSQL regression test 'foreign_key' (example 1158).", "schema": null, "sql": "CREATE CONSTRAINT TRIGGER trig_del_fk_parted AFTER DELETE ON fkpart11.fk_parted INITIALLY DEFERRED FOR EACH ROW EXECUTE FUNCTION fkpart11.print_row();", "explanation": "DDL from PostgreSQL core regression test for Foreign Key.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 150, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Who was the leading scorer on the game played on February 25?", "schema": "CREATE TABLE table_name_28 (leading_scorer VARCHAR, date VARCHAR)", "sql": "SELECT leading_scorer FROM table_name_28 WHERE date = 'february 25';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1}
{"question": "Which museums have the highest number of modern art pieces?", "schema": "CREATE TABLE Museums (id INT, name VARCHAR(50), type VARCHAR(50)); INSERT INTO Museums (id, name, type) VALUES (1, 'Metropolitan Museum', 'Art'), (2, 'Natural History Museum', 'Science'); CREATE TABLE ArtPieces (id INT, title VARCHAR(50), museumId INT, artType VARCHAR(50)); INSERT INTO ArtPieces (id, title, museumId, artType) VALUES (1, 'Mona Lisa', 1, 'Modern'), (2, 'Starry Night', 1, 'Modern'), (3, 'Dinosaur Fossil', 2, 'Ancient');", "sql": "SELECT Museums.name FROM Museums JOIN ArtPieces ON Museums.id = ArtPieces.museumId WHERE ArtPieces.artType = 'Modern' GROUP BY Museums.name ORDER BY COUNT(*) DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 163, "num_statements": 1}
{"question": "Update the genre of the artist 'Selena Gomez' to 'Pop-Folk'", "schema": "CREATE TABLE Artists (ArtistID INT PRIMARY KEY, ArtistName VARCHAR(100), Genre VARCHAR(50));", "sql": "UPDATE Artists SET Genre = 'Pop-Folk' WHERE ArtistName = 'Selena Gomez';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1}
{"question": "List the community education programs that have received more than 500 donations.", "schema": "CREATE TABLE if NOT EXISTS community_education (program_id INT, program_name VARCHAR(50), donation_count INT); INSERT INTO community_education (program_id, program_name, donation_count) VALUES (1, 'Wildlife Conservation 101', 500); INSERT INTO community_education (program_id, program_name, donation_count) VALUES (2, 'Endangered Species Awareness', 300); INSERT INTO community_education (program_id, program_name, donation_count) VALUES (3, 'Habitat Protection Techniques', 700);", "sql": "SELECT program_name FROM community_education WHERE donation_count > 500;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1}
{"question": "Add new mobile_subscribers from historically underrepresented communities.", "schema": "CREATE TABLE mobile_subscribers (subscriber_id INT, data_usage FLOAT, community_representation VARCHAR(30)); INSERT INTO mobile_subscribers (subscriber_id, data_usage, community_representation) VALUES (5, 35.6, 'Latinx'), (6, 45.7, 'Black/African American'), (7, 55.8, 'Native American'), (8, 65.9, 'Asian');", "sql": "INSERT INTO mobile_subscribers (subscriber_id, data_usage, community_representation) VALUES (9, 75.1, 'Pacific Islander'), (10, 85.2, 'Two or More Races'), (11, 95.3, 'Decline to State'), (12, 105.4, 'Not Hispanic or Latino');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 226, "num_statements": 1}
{"question": "Create a table named 'sales_tax' with columns 'region' and 'tax_percentage'", "schema": "CREATE TABLE sales_tax (region VARCHAR(50), tax_percentage DECIMAL(5,2));", "sql": "CREATE TABLE sales_tax (region VARCHAR(50), tax_percentage DECIMAL(5,2));", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1}
{"question": "Add a new autonomous bus to the public transportation fleet in Vancouver.", "schema": "CREATE TABLE public_transportation (transport_id INT, type VARCHAR(20), city VARCHAR(20)); INSERT INTO public_transportation (transport_id, type, city) VALUES (1, 'Bus', 'Vancouver'), (2, 'Tram', 'Vancouver'), (3, 'Train', 'Vancouver');", "sql": "INSERT INTO public_transportation (transport_id, type, city) VALUES (4, 'Autonomous Bus', 'Vancouver');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 103, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Which CFL team did the player from British Columbia get drafted to", "schema": "CREATE TABLE table_16441561_5 (cfl_team VARCHAR, college VARCHAR)", "sql": "SELECT cfl_team FROM table_16441561_5 WHERE college = 'British Columbia';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What is the sum of bronzes for teams with more than 0 silver and a total under 1?", "schema": "CREATE TABLE table_name_22 (bronze INTEGER, silver VARCHAR, total VARCHAR)", "sql": "SELECT SUM(bronze) FROM table_name_22 WHERE silver > 0 AND total < 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Where does CF Pachuca play?", "schema": "CREATE TABLE table_name_64 (game_site VARCHAR, opponent VARCHAR)", "sql": "SELECT game_site FROM table_name_64 WHERE opponent = 'cf pachuca';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What is the order for a red list of 7 in the didelphidae family?", "schema": "CREATE TABLE table_name_91 (order VARCHAR, red_list VARCHAR, family VARCHAR)", "sql": "SELECT order FROM table_name_91 WHERE red_list = 7 AND family = 'didelphidae';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1}
{"question": "Show the number of workers in each department by gender for the past month.", "schema": "CREATE TABLE Workforce (ID INT, Department VARCHAR(255), Gender VARCHAR(255), HireDate DATE); INSERT INTO Workforce (ID, Department, Gender, HireDate) VALUES (1, 'Mining', 'Male', '2021-12-01'), (2, 'Mining', 'Male', '2021-11-01'), (3, 'Mining', 'Female', '2021-10-01'), (4, 'Maintenance', 'Male', '2021-12-01'), (5, 'Maintenance', 'Female', '2021-11-01'), (6, 'Maintenance', 'Male', '2021-10-01'), (7, 'Environment', 'Female', '2021-12-01'), (8, 'Environment', 'Female', '2021-11-01'), (9, 'Environment', 'Male', '2021-10-01'), (10, 'Safety', 'Male', '2021-12-01'), (11, 'Safety', 'Female', '2021-11-01'), (12, 'Safety', 'Male', '2021-10-01');", "sql": "SELECT Department, Gender, COUNT(*) as Number_of_Workers FROM Workforce WHERE HireDate >= DATEADD(MONTH, -1, GETDATE()) GROUP BY Department, Gender;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 148, "num_statements": 1}
{"question": "What are the names of all chemical compounds that have been used in the production of a hazardous product in the past 6 months?", "schema": "CREATE TABLE chemical_compounds (compound_id INT, name TEXT); CREATE TABLE product_compounds (compound_id INT, product_id INT); CREATE TABLE products (product_id INT, hazardous_flag BOOLEAN, production_date DATE);", "sql": "SELECT chemical_compounds.name FROM chemical_compounds INNER JOIN product_compounds ON chemical_compounds.compound_id = product_compounds.compound_id INNER JOIN products ON product_compounds.product_id = products.product_id WHERE products.hazardous_flag = TRUE AND products.production_date > DATEADD(month, -6, GETDATE());", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 322, "num_statements": 1}
{"question": "List all exploration projects in the 'Asia-Pacific' region, along with their start dates.", "schema": "CREATE TABLE exploration_projects (project_id INT, project_name VARCHAR(50), region VARCHAR(50), start_date DATE); INSERT INTO exploration_projects (project_id, project_name, region, start_date) VALUES (1, 'Project X', 'Asia-Pacific', '2020-01-01');", "sql": "SELECT project_name, start_date FROM exploration_projects WHERE region = 'Asia-Pacific';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1}
{"question": "What is the total water consumption by all sectors in 2015 and 2016?", "schema": "CREATE TABLE total_consumption (year INT, sector TEXT, consumption FLOAT); INSERT INTO total_consumption (year, sector, consumption) VALUES (2015, 'residential', 123.5), (2015, 'commercial', 234.6), (2016, 'residential', 130.2), (2016, 'commercial', 240.1);", "sql": "SELECT consumption FROM total_consumption WHERE year IN (2015, 2016);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What is the Site when the Result was w52-0?", "schema": "CREATE TABLE table_name_92 (site VARCHAR, result VARCHAR)", "sql": "SELECT site FROM table_name_92 WHERE result = 'w52-0';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1}
{"question": "What is the total CO2 emissions for each mining operation in the past 6 months, ordered by the most emitting operation?", "schema": "CREATE TABLE mining_operations (id INT, name TEXT, co2_emissions INT, operation_date DATE); INSERT INTO mining_operations (id, name, co2_emissions, operation_date) VALUES (1, 'Operation X', 12000, '2021-07-01'), (2, 'Operation Y', 15000, '2021-07-01'), (3, 'Operation Z', 18000, '2021-07-01');", "sql": "SELECT name, SUM(co2_emissions) FROM mining_operations WHERE operation_date >= DATEADD(month, -6, GETDATE()) GROUP BY name ORDER BY SUM(co2_emissions) DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 156, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What is the highest overall pick that has c as the position, with an NFL Draft greater than 1977?", "schema": "CREATE TABLE table_name_27 (overall_pick INTEGER, position VARCHAR, nfl_draft VARCHAR)", "sql": "SELECT MAX(overall_pick) FROM table_name_27 WHERE position = 'c' AND nfl_draft > 1977;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1}
{"question": "List the mining sites in the 'Asia-Pacific' region with environmental impact scores above 85.", "schema": "CREATE TABLE mining_sites (id INT, site_name VARCHAR(50), location VARCHAR(50), environmental_score FLOAT); INSERT INTO mining_sites (id, site_name, location, environmental_score) VALUES (1, 'Site A', 'Australia', 82.50);", "sql": "SELECT site_name, environmental_score FROM mining_sites WHERE location LIKE 'Asia-Pacific' AND environmental_score > 85.00;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 123, "num_statements": 1}
{"question": "Write the DML statement from PostgreSQL regression test 'without_overlaps' (example 311).", "schema": null, "sql": "INSERT INTO temporal_mltrng (id, valid_at) VALUES ('[2,3)', datemultirange(daterange('2005-01-01', '2006-01-01'))) ON CONFLICT DO NOTHING;", "explanation": "DML from PostgreSQL core regression test for Without Overlaps.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_insert", "is_postgresql_specific": true, "sql_length": 138, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What is the name of team 1 that was after the 2005 season and with a 4-2 score?", "schema": "CREATE TABLE table_name_11 (team_1 VARCHAR, season VARCHAR, score VARCHAR)", "sql": "SELECT team_1 FROM table_name_11 WHERE season > 2005 AND score = '4-2';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What team was he on when he finished in 11th position?", "schema": "CREATE TABLE table_24491017_1 (team VARCHAR, position VARCHAR)", "sql": "SELECT team FROM table_24491017_1 WHERE position = '11th';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What To par scored 72-71-68=211?", "schema": "CREATE TABLE table_name_29 (to_par VARCHAR, score VARCHAR)", "sql": "SELECT to_par FROM table_name_29 WHERE score = 72 - 71 - 68 = 211;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1}
{"question": "How many deep-sea expeditions were conducted by country?'", "schema": "CREATE TABLE deep_sea_expeditions (expedition_id INT, country VARCHAR(50), year INT);", "sql": "SELECT country, COUNT(expedition_id) AS num_expeditions FROM deep_sea_expeditions GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 103, "num_statements": 1}
{"question": "How many fans attended football games in Los Angeles during 2022?", "schema": "CREATE TABLE games (team VARCHAR(255), city VARCHAR(255), date DATE, attendance INT); INSERT INTO games (team, city, date, attendance) VALUES ('LA Rams', 'Los Angeles', '2022-09-08', 70000), ('LA Chargers', 'Los Angeles', '2022-09-11', 65000);", "sql": "SELECT SUM(attendance) FROM games WHERE city = 'Los Angeles' AND YEAR(date) = 2022 AND sport = 'Football';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 106, "num_statements": 1}
{"question": "How many 'public' investors have made investments in the 'healthcare' sector?", "schema": "CREATE TABLE investors (investor_id INT, investor_name VARCHAR(30), investor_type VARCHAR(20)); CREATE TABLE investments (investment_id INT, investor_id INT, sector_id INT);", "sql": "SELECT COUNT(*) FROM investments i INNER JOIN investors j ON i.investor_id = j.investor_id WHERE j.investor_type = 'public' AND i.sector_id IN (SELECT sector_id FROM sectors WHERE sector_name = 'healthcare');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 208, "num_statements": 1}
{"question": "PostgreSQL regression test 'text': Write the SELECT query (example 29).", "schema": null, "sql": "select format('Hello %%');", "explanation": "Regression test for Text in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select format('Hello %%')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 26, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What was the score when the away team was norwich city?", "schema": "CREATE TABLE table_24887326_8 (score_1 VARCHAR, away_team VARCHAR)", "sql": "SELECT score_1 FROM table_24887326_8 WHERE away_team = 'Norwich City';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What was the result of the game from September 10, 2000?", "schema": "CREATE TABLE table_name_68 (result VARCHAR, date VARCHAR)", "sql": "SELECT result FROM table_name_68 WHERE date = 'september 10, 2000';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1}
{"question": "What is the average distance traveled per day by all electric vehicles in the city of Seattle?", "schema": "CREATE TABLE ElectricVehicles (id INT, make VARCHAR(50), model VARCHAR(50), daily_distance FLOAT, city VARCHAR(50)); INSERT INTO ElectricVehicles (id, make, model, daily_distance, city) VALUES (1, 'Tesla', 'Model 3', 45.6, 'Seattle'); INSERT INTO ElectricVehicles (id, make, model, daily_distance, city) VALUES (2, 'Chevrolet', 'Bolt', 38.2, 'Seattle');", "sql": "SELECT AVG(daily_distance) FROM ElectricVehicles WHERE city = 'Seattle';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1}
{"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 315).", "schema": null, "sql": "CREATE OPERATOR <> (\n\tPROCEDURE = isnne,\n\tLEFTARG = ismn,\n\tRIGHTARG = ismn13,\n\tCOMMUTATOR = <>,\n\tNEGATOR = =,\n\tRESTRICT = neqsel,\n\tJOIN = neqjoinsel);", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "ddl_advanced", "is_postgresql_specific": true, "sql_length": 150, "num_statements": 1}
{"question": "Which team won the most titles in the NFL?", "schema": "CREATE TABLE nfl_titles (team VARCHAR(50), titles INT); INSERT INTO nfl_titles (team, titles) VALUES ('Pittsburgh Steelers', 6), ('New England Patriots', 6), ('Dallas Cowboys', 5);", "sql": "SELECT team, MAX(titles) AS most_titles FROM nfl_titles;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1}
{"question": "What is the minimum ocean acidification level in the Pacific Ocean?", "schema": "CREATE TABLE ocean_acidification (location TEXT, value FLOAT); INSERT INTO ocean_acidification (location, value) VALUES ('Pacific Ocean', 8.1), ('Atlantic Ocean', 8.0);", "sql": "SELECT MIN(value) FROM ocean_acidification WHERE location = 'Pacific Ocean';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1}
{"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 30).", "schema": null, "sql": "CREATE FUNCTION hs_contains(hstore,hstore)\nRETURNS bool\nAS 'MODULE_PATHNAME','hstore_contains'\nLANGUAGE C STRICT IMMUTABLE PARALLEL SAFE;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 137, "num_statements": 1}
{"question": "Calculate the total population of all animals in Australian conservation programs", "schema": "CREATE TABLE conservation_programs (id INT, program_name VARCHAR(255), location VARCHAR(255)); CREATE TABLE animal_populations (id INT, program_id INT, animal_type VARCHAR(255), population INT); INSERT INTO conservation_programs (id, program_name, location) VALUES (1, 'Australian Wildlife Conservancy', 'Australia'), (2, 'Taronga Conservation Society', 'Australia'); INSERT INTO animal_populations (id, program_id, animal_type, population) VALUES (1, 1, 'Kangaroo', 10000), (2, 1, 'Wallaby', 5000), (3, 2, 'Koala', 8000), (4, 2, 'Wombat', 2000);", "sql": "SELECT SUM(animal_populations.population) FROM conservation_programs INNER JOIN animal_populations ON conservation_programs.id = animal_populations.program_id WHERE conservation_programs.location = 'Australia';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 210, "num_statements": 1}
{"question": "Delete diversity_metrics records for company_id 102", "schema": "CREATE TABLE diversity_metrics (id INT PRIMARY KEY, company_id INT, gender VARCHAR(50), diversity_score DECIMAL(3,2));", "sql": "DELETE FROM diversity_metrics WHERE company_id = 102;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Name the date for attendance more than 20,268", "schema": "CREATE TABLE table_name_47 (date VARCHAR, attendance INTEGER)", "sql": "SELECT date FROM table_name_47 WHERE attendance > 20 OFFSET 268;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What is the lane for notes Q, SB and time less than 11.22?", "schema": "CREATE TABLE table_name_31 (lane INTEGER, notes VARCHAR, time___sec__ VARCHAR)", "sql": "SELECT SUM(lane) FROM table_name_31 WHERE notes = 'q, sb' AND time___sec__ < 11.22;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What player has 14 wins?", "schema": "CREATE TABLE table_name_83 (player VARCHAR, wins VARCHAR)", "sql": "SELECT player FROM table_name_83 WHERE wins = 14;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1}
{"question": "Write the DDL statement from PostgreSQL regression test 'partition_prune' (example 9).", "schema": null, "sql": "create table lp_null partition of lp for values in (null);", "explanation": "DDL from PostgreSQL core regression test for Partition Prune.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What year and the corresponding ceremony was the english titled movie \"time out\" submitted?", "schema": "CREATE TABLE table_22102732_1 (year__ceremony_ VARCHAR, english_title VARCHAR)", "sql": "SELECT year__ceremony_ FROM table_22102732_1 WHERE english_title = 'Time Out';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: When dayton is the team what is the record?", "schema": "CREATE TABLE table_29556461_8 (record VARCHAR, team VARCHAR)", "sql": "SELECT record FROM table_29556461_8 WHERE team = 'Dayton';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What is the highest November that has a game less than 12, and @ detroit red wings as the opponent?", "schema": "CREATE TABLE table_name_57 (november INTEGER, game VARCHAR, opponent VARCHAR)", "sql": "SELECT MAX(november) FROM table_name_57 WHERE game < 12 AND opponent = '@ detroit red wings';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1}
{"question": "What percentage of faculty members in the Physics department are female?", "schema": "CREATE TABLE faculty (id INT, faculty_name VARCHAR(255), department VARCHAR(255), gender VARCHAR(255)); INSERT INTO faculty (id, faculty_name, department, gender) VALUES (1, 'Faculty1', 'Physics', 'Male'), (2, 'Faculty2', 'Physics', 'Female'), (3, 'Faculty3', 'Physics', 'Male'), (4, 'Faculty4', 'Mathematics', 'Female'), (5, 'Faculty5', 'Mathematics', 'Male');", "sql": "SELECT (COUNT(*) FILTER (WHERE gender = 'Female')) * 100.0 / COUNT(*) as percentage FROM faculty WHERE department = 'Physics';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 126, "num_statements": 1}
{"question": "PostgreSQL regression test 'window': Write the SELECT query (example 75).", "schema": null, "sql": "SELECT * FROM v_window;", "explanation": "Regression test for Window in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM v_window) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 23, "num_statements": 1}
{"question": "What is the average checking account balance in the Boston branch?", "schema": "CREATE TABLE accounts (customer_id INT, account_type VARCHAR(20), branch VARCHAR(20), balance DECIMAL(10,2)); INSERT INTO accounts (customer_id, account_type, branch, balance) VALUES (1, 'Savings', 'New York', 5000.00), (2, 'Checking', 'New York', 7000.00), (3, 'Checking', 'Boston', 8000.00), (4, 'Savings', 'Boston', 4000.00);", "sql": "SELECT AVG(balance) FROM accounts WHERE account_type = 'Checking' AND branch = 'Boston';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1}
{"question": "Find the number of sustainable tourism activities in each country.", "schema": "CREATE TABLE SustainableTourismActivities (activity_id INT, activity_name TEXT, country TEXT, local_economic_impact FLOAT); INSERT INTO SustainableTourismActivities (activity_id, activity_name, country, local_economic_impact) VALUES (1, 'Biking Tour', 'Portugal', 12000.0), (2, 'Hiking Adventure', 'Portugal', 15000.0), (3, 'Surfing Lesson', 'Australia', 8000.0);", "sql": "SELECT country, COUNT(*) FROM SustainableTourismActivities GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What primary sponsor has the owner Rick Hendrick and their crew chief is Alan Gustafson?", "schema": "CREATE TABLE table_name_50 (primary_sponsor_s_ VARCHAR, owner_s_ VARCHAR, crew_chief VARCHAR)", "sql": "SELECT primary_sponsor_s_ FROM table_name_50 WHERE owner_s_ = 'rick hendrick' AND crew_chief = 'alan gustafson';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 112, "num_statements": 1}
{"question": "What is the maximum cargo weight transported by any vessel for each destination?", "schema": "CREATE TABLE cargo_data(id INT, vessel_name VARCHAR(50), destination VARCHAR(50), cargo_weight DECIMAL(5,2)); INSERT INTO cargo_data(id, vessel_name, destination, cargo_weight) VALUES (1, 'Vessel A', 'Port A', 200.0), (2, 'Vessel B', 'Port A', 250.0);", "sql": "SELECT destination, MAX(cargo_weight) FROM cargo_data GROUP BY destination;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What title runs for 3:22?", "schema": "CREATE TABLE table_name_5 (title VARCHAR, time VARCHAR)", "sql": "SELECT title FROM table_name_5 WHERE time = '3:22';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What country has a score less than 68, and paul casey as the player?", "schema": "CREATE TABLE table_name_95 (country VARCHAR, score VARCHAR, player VARCHAR)", "sql": "SELECT country FROM table_name_95 WHERE score < 68 AND player = 'paul casey';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1}
{"question": "Which programs have the highest average donation in the 'Programs' and 'Donations' tables?", "schema": "CREATE TABLE Programs (ProgramID INT, ProgramName VARCHAR(50)); CREATE TABLE Donations (DonorID INT, ProgramID INT, Amount DECIMAL(10, 2)); INSERT INTO Programs (ProgramID, ProgramName) VALUES (1, 'Youth Mentoring'), (2, 'Food Bank'), (3, 'Climate Action'); INSERT INTO Donations (DonorID, ProgramID, Amount) VALUES (1, 1, 500.00), (2, 2, 250.00), (3, 1, 1000.00), (4, 3, 750.00);", "sql": "SELECT Programs.ProgramName, AVG(Donations.Amount) as AverageDonation FROM Programs INNER JOIN Donations ON Programs.ProgramID = Donations.ProgramID GROUP BY Programs.ProgramID ORDER BY AverageDonation DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 215, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Which team had a qualifying 2 time of 1:01.093?", "schema": "CREATE TABLE table_name_11 (team VARCHAR, qual_2 VARCHAR)", "sql": "SELECT team FROM table_name_11 WHERE qual_2 = '1:01.093';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What is the coronie with a 0.7% nickerie?", "schema": "CREATE TABLE table_name_92 (coronie VARCHAR, nickerie VARCHAR)", "sql": "SELECT coronie FROM table_name_92 WHERE nickerie = '0.7%';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1}
{"question": "What is the total number of healthcare providers in the 'rural_clinic' table who are specialized in cardiology?", "schema": "CREATE TABLE rural_clinic (id INT, name VARCHAR(50), specialty VARCHAR(50)); INSERT INTO rural_clinic (id, name, specialty) VALUES (1, 'John Doe', 'Cardiology'), (2, 'Jane Smith', 'Pediatrics'), (3, 'Michael Brown', 'Cardiology');", "sql": "SELECT COUNT(*) FROM rural_clinic WHERE specialty = 'Cardiology';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1}
{"question": "What is the name and capacity of each sports stadium in the state of Florida?", "schema": "CREATE TABLE states (id INT, name VARCHAR(255)); CREATE TABLE stadiums (id INT, state_id INT, name VARCHAR(255), capacity INT);", "sql": "SELECT name, capacity FROM stadiums WHERE state_id = (SELECT id FROM states WHERE name = 'Florida');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 100, "num_statements": 1}
{"question": "Which AI system has the lowest explainability rating in the legal domain?", "schema": "CREATE TABLE explainability_domain (ai_system TEXT, domain TEXT, rating FLOAT); INSERT INTO explainability_domain (ai_system, domain, rating) VALUES ('AI Judge', 'Legal', 0.50), ('Legal Analytics Tool', 'Legal', 0.85), ('AI Contract Review', 'Legal', 0.75);", "sql": "SELECT ai_system, MIN(rating) FROM explainability_domain WHERE domain = 'Legal';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1}
{"question": "What is the total number of military technology patents filed by Asian countries?", "schema": "CREATE TABLE MilitaryPatents (id INT PRIMARY KEY, country VARCHAR(50), technology VARCHAR(50), date DATE); INSERT INTO MilitaryPatents (id, country, technology, date) VALUES (1, 'China', 'Stealth Technology', '2020-01-01'), (2, 'Japan', 'Artificial Intelligence', '2019-12-15'), (3, 'South Korea', 'Cyber Security', '2020-03-02');", "sql": "SELECT country, COUNT(*) as total_patents FROM MilitaryPatents WHERE country IN ('China', 'Japan', 'South Korea') GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 131, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: How many goals when the points 1 is 38 and the played number is less than 42?", "schema": "CREATE TABLE table_name_24 (goals_for INTEGER, points_1 VARCHAR, played VARCHAR)", "sql": "SELECT SUM(goals_for) FROM table_name_24 WHERE points_1 = 38 AND played < 42;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Which current venues location is Mason, Ohio?", "schema": "CREATE TABLE table_14903081_1 (current_venue VARCHAR, location VARCHAR)", "sql": "SELECT current_venue FROM table_14903081_1 WHERE location = 'Mason, Ohio';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1}
{"question": "What is the total number of climate mitigation projects in Asia that were completed before 2015?", "schema": "CREATE TABLE climate_mitigation_projects (project_id INT, project_name TEXT, location TEXT, project_type TEXT, start_date DATE, end_date DATE);", "sql": "SELECT COUNT(project_id) FROM climate_mitigation_projects WHERE location LIKE '%Asia%' AND end_date < '2015-01-01';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 115, "num_statements": 1}
{"question": "Insert a new record for a donation of $500 made by a corporate donor named \"ABC Corp\" on March 15, 2022.", "schema": "CREATE TABLE donations (id INT, donor_id INT, donation_date DATE, amount_donated DECIMAL(10,2)); CREATE TABLE donors (id INT, name TEXT, donor_type TEXT);", "sql": "INSERT INTO donations (id, donor_id, donation_date, amount_donated) VALUES (1, (SELECT id FROM donors WHERE name = 'ABC Corp' AND donor_type = 'Corporate' LIMIT 1), '2022-03-15', 500); INSERT INTO donors (id, name, donor_type) VALUES (1, 'ABC Corp', 'Corporate') ON DUPLICATE KEY UPDATE id = id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 295, "num_statements": 2}
{"question": "Show a query using PostgreSQL contrib extension 'pgcrypto' (example 3).", "schema": null, "sql": "select encrypt('foo', '01234589', 'des');", "explanation": "Example query from the 'pgcrypto' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 41, "num_statements": 1}
{"question": "Which drugs were tested in clinical trials and approved, grouped by approval status?", "schema": "CREATE TABLE ClinicalTrials (trial_id INT, drug_name VARCHAR(255), trial_status VARCHAR(255)); INSERT INTO ClinicalTrials (trial_id, drug_name, trial_status) VALUES (1, 'DrugD', 'Completed'), (2, 'DrugD', 'Failed'), (3, 'DrugE', 'Completed'), (4, 'DrugF', 'In Progress'); CREATE TABLE DrugApproval (drug_name VARCHAR(255), approval_date DATE); INSERT INTO DrugApproval (drug_name, approval_date) VALUES ('DrugD', '2021-03-10'), ('DrugE', '2020-11-25');", "sql": "SELECT ct.trial_status, ct.drug_name FROM ClinicalTrials ct JOIN DrugApproval da ON ct.drug_name = da.drug_name GROUP BY ct.trial_status, ct.drug_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 151, "num_statements": 1}
{"question": "Show a query using PostgreSQL contrib extension 'pageinspect' (example 37).", "schema": null, "sql": "SELECT * FROM hash_page_items(get_raw_page('test_hash_a_idx', 4));", "explanation": "Example query from the 'pageinspect' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1}
{"question": "Show teacher_id, training_type, and completed_date from the teacher_training table", "schema": "CREATE TABLE teacher_training (id INT PRIMARY KEY, teacher_id INT, training_type VARCHAR(255), completed_date DATE);", "sql": "SELECT teacher_id, training_type, completed_date FROM teacher_training;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What is the name of the tournament played 13 November 2000?", "schema": "CREATE TABLE table_name_35 (tournament VARCHAR, date VARCHAR)", "sql": "SELECT tournament FROM table_name_35 WHERE date = '13 november 2000';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: From what country is the player with a score of 68-71-76=215?", "schema": "CREATE TABLE table_name_70 (country VARCHAR, score VARCHAR)", "sql": "SELECT country FROM table_name_70 WHERE score = 68 - 71 - 76 = 215;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1}
{"question": "How many solar energy projects are there in Canada with a commissioned date after 2015?", "schema": "CREATE TABLE solar_energy (project_id INT, project_name VARCHAR(255), country VARCHAR(255), commissioned_date DATE);", "sql": "SELECT COUNT(*) FROM solar_energy WHERE country = 'Canada' AND commissioned_date > '2015-12-31';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Which Visitor played on January 17?", "schema": "CREATE TABLE table_name_67 (visitor VARCHAR, date VARCHAR)", "sql": "SELECT visitor FROM table_name_67 WHERE date = 'january 17';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What is the 54 holes for The Open Championship (4)?", "schema": "CREATE TABLE table_name_7 (championship VARCHAR)", "sql": "SELECT 54 AS _holes FROM table_name_7 WHERE championship = 'the open championship (4)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1}
{"question": "PostgreSQL regression test 'indexing': Write the SELECT query (example 230).", "schema": null, "sql": "select c.relname, pg_get_indexdef(indexrelid)\n from pg_class c join pg_index i on c.oid = i.indexrelid\n where indrelid::regclass::text like 'idxpart%'\n order by indexrelid::regclass::text collate \"C\";", "explanation": "Regression test for Indexing in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select c.relname, pg_get_indexdef(indexrelid)\n from pg_class c join pg_index i on c.oid = i.indexrelid\n where indrelid::regclass::text like 'idxpart%'\n order by indexrelid::regclass::text collate \"C\") AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": true, "sql_length": 203, "num_statements": 1}
{"question": "What are the names of all satellites launched by India?", "schema": "CREATE TABLE Satellites (Id INT, Name VARCHAR(50), LaunchYear INT, Country VARCHAR(50)); INSERT INTO Satellites (Id, Name, LaunchYear, Country) VALUES (1, 'Sat1', 2018, 'USA'), (2, 'Sat2', 2019, 'USA'), (3, 'Sat3', 2020, 'USA'), (4, 'Sat4', 2020, 'China'), (5, 'Sat5', 2020, 'Russia'), (6, 'Sat6', 2018, 'Germany'), (7, 'Sat7', 2019, 'India'), (8, 'Sat8', 2020, 'India'), (9, 'Sat9', 2020, 'India');", "sql": "SELECT Name FROM Satellites WHERE Country = 'India';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1}
{"question": "What is the average salary of workers in the manufacturing industry, grouped by their job role and location?", "schema": "CREATE TABLE salaries (worker_id INT, job_role VARCHAR(255), location VARCHAR(255), salary FLOAT);", "sql": "SELECT location, job_role, AVG(salary) FROM salaries GROUP BY location, job_role;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1}
{"question": "What is the average age of astronauts at their first space mission?", "schema": "CREATE TABLE astronauts(id INT, name VARCHAR(50), age INT, first_mission_year INT); INSERT INTO astronauts VALUES(1, 'Yang Liwei', 38, 2003), (2, 'Valentina Tereshkova', 26, 1963);", "sql": "SELECT AVG(age - first_mission_year) FROM astronauts;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1}
{"question": "PostgreSQL regression test 'horology': Write the SELECT query (example 131).", "schema": null, "sql": "SELECT (timestamp without time zone 'tomorrow' > 'now') as \"True\";", "explanation": "Regression test for Horology in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT (timestamp without time zone 'tomorrow' > 'now') as \"True\") AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Name the high points for record 9-4", "schema": "CREATE TABLE table_22654073_6 (high_points VARCHAR, record VARCHAR)", "sql": "SELECT high_points FROM table_22654073_6 WHERE record = '9-4';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Name the score in the final for runner-up and hard surface with opponents being michaël llodra nenad zimonjić", "schema": "CREATE TABLE table_name_1 (score_in_the_final VARCHAR, opponents_in_the_final VARCHAR, outcome VARCHAR, surface VARCHAR)", "sql": "SELECT score_in_the_final FROM table_name_1 WHERE outcome = 'runner-up' AND surface = 'hard' AND opponents_in_the_final = 'michaël llodra nenad zimonjić';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 154, "num_statements": 1}
{"question": "Insert a new product with an ID of 5, a department of 'home_decor', and a price of 14.99.", "schema": "CREATE TABLE products (product_id INT, department VARCHAR(20), price DECIMAL(5,2));", "sql": "INSERT INTO products (product_id, department, price) VALUES (5, 'home_decor', 14.99);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1}
{"question": "What is the number of donors and total donation amount for each program?", "schema": "CREATE TABLE donations (donor_id INT, program_id VARCHAR(20), amount DECIMAL(10,2)); INSERT INTO donations (donor_id, program_id, amount) VALUES (1, 'Education', 500.00), (2, 'Health', 300.00), (3, 'Education', 250.00);", "sql": "SELECT program_id, COUNT(DISTINCT donor_id) AS num_donors, SUM(amount) AS total_donations FROM donations GROUP BY program_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 125, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: How many FA cups for the player with under 5 champs, 0 league cups, and over 3 total?", "schema": "CREATE TABLE table_name_13 (fa_cup INTEGER, total VARCHAR, championship VARCHAR, league_cup VARCHAR)", "sql": "SELECT SUM(fa_cup) FROM table_name_13 WHERE championship < 5 AND league_cup = 0 AND total > 3;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1}
{"question": "Show me the cybersecurity policies that were dropped in the last week.", "schema": "CREATE TABLE cybersecurity_policies (id INT, name VARCHAR(50), description TEXT, date DATE); INSERT INTO cybersecurity_policies (id, name, description, date) VALUES (1, 'Incident response policy', 'Outlines the process for responding to security incidents', '2022-04-15'), (2, 'Access control policy', 'Defines who has access to what resources', '2022-05-05'), (3, 'Password policy', 'Defines password complexity and length requirements', '2022-05-12');", "sql": "SELECT * FROM cybersecurity_policies WHERE date >= DATEADD(week, -1, GETDATE()) AND name = 'Password policy';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 109, "num_statements": 1}
{"question": "Which country has the most exploration projects?", "schema": "CREATE TABLE exploration_projects (project_id INT, project_name VARCHAR(50), country VARCHAR(50)); INSERT INTO exploration_projects (project_id, project_name, country) VALUES (1, 'Project X', 'Brazil'), (2, 'Project Y', 'Nigeria'), (3, 'Project Z', 'Brazil');", "sql": "SELECT countries.country_name, COUNT(exploration_projects.project_id) FROM exploration_projects INNER JOIN countries ON exploration_projects.country = countries.country_name GROUP BY countries.country_name ORDER BY COUNT(exploration_projects.project_id) DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 267, "num_statements": 1}
{"question": "PostgreSQL regression test 'misc_functions': Write the SELECT query (example 124).", "schema": null, "sql": "SELECT * FROM pg_split_walfile_name('invalid');", "explanation": "Regression test for Misc Functions in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM pg_split_walfile_name('invalid')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 47, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What is the lowest cuts made that had a Top-25 less than 6 and wins greater than 0?", "schema": "CREATE TABLE table_name_63 (cuts_made INTEGER, top_25 VARCHAR, wins VARCHAR)", "sql": "SELECT MIN(cuts_made) FROM table_name_63 WHERE top_25 < 6 AND wins < 0;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1}
{"question": "What is the percentage of employees from underrepresented racial or ethnic groups, by department, for the entire company?", "schema": "CREATE TABLE employees (id INT, name VARCHAR(50), department VARCHAR(50), race VARCHAR(50)); INSERT INTO employees (id, name, department, race) VALUES (1, 'John Doe', 'IT', 'Caucasian'); INSERT INTO employees (id, name, department, race) VALUES (2, 'Jane Smith', 'HR', 'African American');", "sql": "SELECT department, SUM(CASE WHEN race IN ('African American', 'Hispanic', 'Native American', 'Asian', 'Pacific Islander') THEN 1 ELSE 0 END) / COUNT(*) * 100 AS pct_underrepresented FROM employees GROUP BY department;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 217, "num_statements": 1}
{"question": "PostgreSQL regression test 'hash_func': Write the SELECT query (example 17).", "schema": null, "sql": "SELECT v as value, hash_array(v)::bit(32) as standard,\n hash_array_extended(v, 0)::bit(32) as extended0,\n hash_array_extended(v, 1)::bit(32) as extended1\nFROM (VALUES ('{0}'::int4[]), ('{0,1,2,3,4}'), ('{17,18,19,20}'),\n ('{42,34,65,98}'), ('{550273,590027, 870273}'),\n ('{207112489, 807112489}')) x(v)\nWHERE hash_array(v)::bit(32) != hash_array_extended(v, 0)::bit(32)\n OR hash_array(v)::bit(32) = hash_array_extended(v, 1)::bit(32);", "explanation": "Regression test for Hash Func in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT v as value, hash_array(v)::bit(32) as standard,\n hash_array_extended(v, 0)::bit(32) as extended0,\n hash_array_extended(v, 1)::bit(32) as extended1\nFROM (VALUES ('{0}'::int4[]), ('{0,1,2,3,4}'), ('{17,18,19,20}'),\n ('{42,34,65,98}'), ('{550273,590027, 870273}'),\n ('{207112489, 807112489}')) x(v)\nWHERE hash_array(v)::bit(32) != hash_array_extended(v, 0)::bit(32)\n OR hash_array(v)::bit(32) = hash_array_extended(v, 1)::bit(32)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 469, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: In the tournament that has 15 events, and less than 6 top-25's, how many top-5's did he have?", "schema": "CREATE TABLE table_name_76 (top_5 INTEGER, events VARCHAR, top_25 VARCHAR)", "sql": "SELECT SUM(top_5) FROM table_name_76 WHERE events = 15 AND top_25 < 6;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What is the average rank of the province alborz, which had more than 14526 in 1956?", "schema": "CREATE TABLE table_name_95 (rank INTEGER, province VARCHAR)", "sql": "SELECT AVG(rank) FROM table_name_95 WHERE province = 'alborz' AND 1956 > 14526;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1}
{"question": "pgTAP test for Partitions (assertion 38).", "schema": null, "sql": "SELECT * FROM check_test(\n partitions_are( 'public', 'parted', '{part1,part2,hide.part3}'::name[] ),\n true,\n 'partitions_are( sch, tab, parts )',\n 'Table public.parted should have the correct partitions',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Partitions.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 226, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: When did the king who entered office in 1012 leave office?", "schema": "CREATE TABLE table_name_34 (left_office VARCHAR, entered_office VARCHAR)", "sql": "SELECT left_office FROM table_name_34 WHERE entered_office = '1012';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1}
{"question": "PostgreSQL regression test 'strings': Write the SELECT query (example 475).", "schema": null, "sql": "SELECT '\\x80000000'::bytea::int4 AS \"-2147483648\", '\\x7FFFFFFF'::bytea::int4 AS \"2147483647\";", "explanation": "Regression test for Strings in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT '\\x80000000'::bytea::int4 AS \"-2147483648\", '\\x7FFFFFFF'::bytea::int4 AS \"2147483647\") AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1}
{"question": "Show a query using PostgreSQL contrib extension 'btree_gin' (example 11).", "schema": null, "sql": "SELECT * FROM test_timestamp WHERE i<'2004-10-27'::date ORDER BY i;", "explanation": "Example query from the 'btree_gin' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1}
{"question": "Obtain threat intelligence metrics related to cyber attacks on defense networks", "schema": "CREATE TABLE threat_intelligence (threat_id INT, threat_source VARCHAR(50), attack_vector VARCHAR(50), network_impact FLOAT, date DATE); INSERT INTO threat_intelligence (threat_id, threat_source, attack_vector, network_impact, date) VALUES (1, 'APT10', 'Phishing', 75, '2020-01-05'); INSERT INTO threat_intelligence (threat_id, threat_source, attack_vector, network_impact, date) VALUES (2, 'APT28', 'Malware', 90, '2020-04-10');", "sql": "SELECT threat_source, attack_vector, network_impact, date FROM threat_intelligence;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Bronze of 2, and a Silver smaller than 0 then what is the sum of the gold?", "schema": "CREATE TABLE table_name_99 (gold INTEGER, bronze VARCHAR, silver VARCHAR)", "sql": "SELECT SUM(gold) FROM table_name_99 WHERE bronze = 2 AND silver < 0;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: How many bullet tips colors had other features of a blue band on case base?", "schema": "CREATE TABLE table_1036189_1 (bullet_tip_color VARCHAR, other_features VARCHAR)", "sql": "SELECT COUNT(bullet_tip_color) FROM table_1036189_1 WHERE other_features = 'Blue band on case base';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 100, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: How many different jockeys ran on 17 Feb 2007?", "schema": "CREATE TABLE table_14981555_3 (jockey VARCHAR, date VARCHAR)", "sql": "SELECT COUNT(jockey) FROM table_14981555_3 WHERE date = '17 Feb 2007';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: how many county with per capita income being $20,101", "schema": "CREATE TABLE table_1350350_2 (county VARCHAR, per_capita_income VARCHAR)", "sql": "SELECT COUNT(county) FROM table_1350350_2 WHERE per_capita_income = '$20,101';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1}
{"question": "Which biosensor samples have expression levels higher than 5 for gene 'XYZ'?", "schema": "CREATE SCHEMA if not exists biosensor; USE biosensor; CREATE TABLE if not exists gene_expression (sample_id INT, gene_name VARCHAR(255), expression DECIMAL(5,2)); INSERT INTO gene_expression (sample_id, gene_name, expression) VALUES (1, 'ABC', 3.45), (2, 'ABC', 3.56), (3, 'XYZ', 5.12), (4, 'DEF', 2.98), (5, 'XYZ', 6.25), (6, 'GHI', 4.02);", "sql": "SELECT sample_id, gene_name, expression FROM biosensor.gene_expression WHERE gene_name = 'XYZ' AND expression > 5;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 114, "num_statements": 1}
{"question": "Write the DDL statement from PostgreSQL regression test 'rowsecurity' (example 847).", "schema": null, "sql": "CREATE TABLE tbl1 (c) AS VALUES ('bar'::text);", "explanation": "DDL from PostgreSQL core regression test for Rowsecurity.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 46, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What is the Gecko value for the item that has a Prince XML value of 'no' and a KHTML value of 'yes'?", "schema": "CREATE TABLE table_name_6 (gecko VARCHAR, prince_xml VARCHAR, khtml VARCHAR)", "sql": "SELECT gecko FROM table_name_6 WHERE prince_xml = 'yes' AND khtml = 'yes';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Name the season for runner up of judean rebels", "schema": "CREATE TABLE table_name_56 (season VARCHAR, runner_up VARCHAR)", "sql": "SELECT season FROM table_name_56 WHERE runner_up = 'judean rebels';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Who was in lane 6 with a mark of 2:05.58 SB?", "schema": "CREATE TABLE table_name_84 (name VARCHAR, lane VARCHAR, mark VARCHAR)", "sql": "SELECT name FROM table_name_84 WHERE lane = 6 AND mark = '2:05.58 sb';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What is the Runner-up on April 11?", "schema": "CREATE TABLE table_name_36 (runner_up VARCHAR, week VARCHAR)", "sql": "SELECT runner_up FROM table_name_36 WHERE week = 'april 11';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Who did the Blue Jays play against on April 11?", "schema": "CREATE TABLE table_name_52 (opponent VARCHAR, date VARCHAR)", "sql": "SELECT opponent FROM table_name_52 WHERE date = 'april 11';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1}
{"question": "Show a query using PostgreSQL contrib extension 'pageinspect' (example 9).", "schema": null, "sql": "SELECT * FROM brin_revmap_data(get_raw_page('test1_a_idx', 0)) LIMIT 5;", "explanation": "Example query from the 'pageinspect' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Who is the Winnning driver in which lorenzo bandini has the fastest lap as well as the Pole position?", "schema": "CREATE TABLE table_name_21 (winning_driver VARCHAR, fastest_lap VARCHAR, pole_position VARCHAR)", "sql": "SELECT winning_driver FROM table_name_21 WHERE fastest_lap = 'lorenzo bandini' AND pole_position = 'lorenzo bandini';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 117, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What was the rank of the park that had a value of 5,040,000 in 2010?", "schema": "CREATE TABLE table_name_30 (rank VARCHAR)", "sql": "SELECT rank FROM table_name_30 WHERE 2010 = '5,040,000';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1}
{"question": "How many public meetings have taken place in the Finance department since 2016?", "schema": "CREATE TABLE meetings (id INT, department TEXT, date DATE); INSERT INTO meetings (id, department, date) VALUES (1, 'Education', '2015-01-01'); INSERT INTO meetings (id, department, date) VALUES (2, 'Education', '2016-01-01'); INSERT INTO meetings (id, department, date) VALUES (3, 'Finance', '2015-01-01');", "sql": "SELECT COUNT(*) FROM meetings WHERE department = 'Finance' AND date >= '2016-01-01';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1}
{"question": "How many network infrastructure investments were made in a specific country in the last year?", "schema": "CREATE TABLE network_investments (investment_id INT, investment_date DATE, country VARCHAR(50), investment_amount INT);", "sql": "SELECT country, COUNT(investment_id) FROM network_investments WHERE investment_date >= DATE_SUB(CURRENT_DATE, INTERVAL 1 YEAR) GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 144, "num_statements": 1}
{"question": "PostgreSQL regression test 'join': Write the SELECT query (example 36).", "schema": null, "sql": "SELECT tx.ii, tx.jj, tx.kk\n FROM (J1_TBL t1 (a, b, c) CROSS JOIN J2_TBL t2 (d, e))\n AS tx (ii, jj, tt, ii2, kk);", "explanation": "Regression test for Join in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT tx.ii, tx.jj, tx.kk\n FROM (J1_TBL t1 (a, b, c) CROSS JOIN J2_TBL t2 (d, e))\n AS tx (ii, jj, tt, ii2, kk)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 116, "num_statements": 1}
{"question": "What is the maximum recorded depth for a marine species habitat?", "schema": "CREATE TABLE marine_species (id INT, name VARCHAR(255), habitat_type VARCHAR(255), average_depth FLOAT); INSERT INTO marine_species (id, name, habitat_type, average_depth) VALUES (1, 'Clownfish', 'Coral Reef', 20.0); INSERT INTO marine_species (id, name, habitat_type, average_depth) VALUES (2, 'Blue Whale', 'Open Ocean', 200.0); CREATE TABLE ocean_depths (location VARCHAR(255), depth FLOAT); INSERT INTO ocean_depths (location, depth) VALUES ('Mariana Trench', 10994.0); INSERT INTO ocean_depths (location, depth) VALUES ('Sierra Leone Rise', 5791.0);", "sql": "SELECT MAX(od.depth) as max_depth FROM marine_species ms JOIN ocean_depths od ON ms.habitat_type = od.location;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 111, "num_statements": 1}
{"question": "How many geopolitical risk assessments were conducted for Brazil in 2019 and 2020?", "schema": "CREATE TABLE Geopolitical_Risk_Assessments (assessment_id INT, assessment_date DATE, country VARCHAR(50)); INSERT INTO Geopolitical_Risk_Assessments (assessment_id, assessment_date, country) VALUES (1, '2019-05-12', 'Brazil'), (2, '2020-07-03', 'Brazil'), (3, '2021-11-28', 'Brazil');", "sql": "SELECT COUNT(assessment_id) FROM Geopolitical_Risk_Assessments WHERE country = 'Brazil' AND YEAR(assessment_date) IN (2019, 2020);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 130, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What country is David Graham from?", "schema": "CREATE TABLE table_name_94 (country VARCHAR, player VARCHAR)", "sql": "SELECT country FROM table_name_94 WHERE player = 'david graham';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What is the average number of goals conceded where more than 19 goals were scored, the team had 31 points, and more than 7 draws?", "schema": "CREATE TABLE table_name_80 (goals_conceded__gc_ INTEGER, draw__pe_ VARCHAR, goals_scored__gf_ VARCHAR, points__pts_ VARCHAR)", "sql": "SELECT AVG(goals_conceded__gc_) FROM table_name_80 WHERE goals_scored__gf_ > 19 AND points__pts_ = 31 AND draw__pe_ > 7;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 120, "num_statements": 1}
{"question": "Show a SQL definition from the postgrest project (schema, item 49).", "schema": null, "sql": "create function change_role_statement_timeout(timeout text) returns void as $_$\nbegin\n execute format($$\n alter role current_user set statement_timeout = %L;\n $$, timeout);", "explanation": "SQL definition from the open-source postgrest PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "plpgsql_function", "is_postgresql_specific": false, "sql_length": 177, "num_statements": 2}
{"question": "Write the DML statement from PostgreSQL regression test 'oid' (example 17).", "schema": null, "sql": "INSERT INTO OID_TBL(f1) VALUES (' - 500');", "explanation": "DML from PostgreSQL core regression test for Oid.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 42, "num_statements": 1}
{"question": "Which counties in 'voting_data' table have less than 10,000 registered voters?", "schema": "CREATE TABLE voting_data (county VARCHAR(255), num_voters INT);", "sql": "SELECT county FROM voting_data WHERE num_voters < 10000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1}
{"question": "Write the DML statement from PostgreSQL regression test 'join' (example 808).", "schema": null, "sql": "insert into j2 values(1),(2),(3);", "explanation": "DML from PostgreSQL core regression test for Join.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 33, "num_statements": 1}
{"question": "PostgreSQL regression test 'sqljson_queryfuncs': Write the SELECT query (example 181).", "schema": null, "sql": "SELECT * FROM unnest((JSON_QUERY(jsonb '{\"jsa\": [{\"a\": 1, \"b\": [\"foo\"]}, {\"a\": 2, \"c\": {}}, 123]}', '$' RETURNING sqljsonb_rec)).jsa);", "explanation": "Regression test for Sqljson Queryfuncs in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM unnest((JSON_QUERY(jsonb '{\"jsa\": [{\"a\": 1, \"b\": [\"foo\"]}, {\"a\": 2, \"c\": {}}, 123]}', '$' RETURNING sqljsonb_rec)).jsa)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 135, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Which team was the opponent on December 30?", "schema": "CREATE TABLE table_name_17 (team VARCHAR, date VARCHAR)", "sql": "SELECT team FROM table_name_17 WHERE date = 'december 30';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: How many times did li ju win the womens singles and wang liqin win the mens singles?", "schema": "CREATE TABLE table_28138035_6 (womens_doubles VARCHAR, womens_singles VARCHAR, mens_singles VARCHAR)", "sql": "SELECT COUNT(womens_doubles) FROM table_28138035_6 WHERE womens_singles = 'Li Ju' AND mens_singles = 'Wang Liqin';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 114, "num_statements": 1}
{"question": "What is the maximum calorie burn during 'Cardio' workouts for members residing in 'California'?", "schema": "CREATE TABLE Workouts (MemberID INT, State VARCHAR(20), WorkoutType VARCHAR(20), CaloriesBurned INT); INSERT INTO Workouts (MemberID, State, WorkoutType, CaloriesBurned) VALUES (1, 'California', 'Cardio', 300), (2, 'New York', 'Strength', 250), (3, 'California', 'Cardio', 350);", "sql": "SELECT MAX(CaloriesBurned) FROM Workouts WHERE State = 'California' AND WorkoutType = 'Cardio';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: For the game ending with a score of 28-43, what is the listed as the final record?", "schema": "CREATE TABLE table_name_86 (record VARCHAR, score VARCHAR)", "sql": "SELECT record FROM table_name_86 WHERE score = '28-43';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1}
{"question": "Create a view named \"health_trends\" with columns \"metric_name\", \"latest_value\", and \"one_year_ago_value\". Only include metrics with measurement dates within the last year.", "schema": "CREATE TABLE ocean_health_metrics (metric_id INT PRIMARY KEY, metric_name VARCHAR(255), metric_value FLOAT, measurement_date DATE); INSERT INTO ocean_health_metrics (metric_id, metric_name, metric_value, measurement_date) VALUES (1, 'Dissolved oxygen', 6.5, '2022-01-01'), (2, 'pH', 8.1, '2022-01-02'), (3, 'Temperature', 15.2, '2022-01-03'), (4, 'Salinity', 34.8, '2022-01-04');", "sql": "CREATE VIEW health_trends AS SELECT metric_name, MAX(metric_value) AS latest_value, (SELECT metric_value FROM ocean_health_metrics ohm2 WHERE ohm2.metric_name = ohm.metric_name AND ohm2.measurement_date = DATE_SUB(CURDATE(), INTERVAL 1 YEAR)) AS one_year_ago_value FROM ocean_health_metrics ohm WHERE measurement_date >= DATE_SUB(CURDATE(), INTERVAL 1 YEAR) GROUP BY metric_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_view", "is_postgresql_specific": false, "sql_length": 379, "num_statements": 1}
{"question": "What is the total installed capacity (in kW) of renewable energy projects for each city?", "schema": "CREATE TABLE renewable_projects (id INT, project_name VARCHAR(255), city VARCHAR(255), installed_capacity FLOAT);", "sql": "SELECT city, SUM(installed_capacity) FROM renewable_projects GROUP BY city;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What is the sum of gold medals for the United States with silver medal count greater than 3?", "schema": "CREATE TABLE table_name_26 (gold INTEGER, nation VARCHAR, silver VARCHAR)", "sql": "SELECT SUM(gold) FROM table_name_26 WHERE nation = 'united states' AND silver > 3;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1}
{"question": "How many dispensaries exist in Colorado with a valid license in 2023?", "schema": "CREATE TABLE dispensaries (id INT, name TEXT, state TEXT, license_expiry DATE); INSERT INTO dispensaries (id, name, state, license_expiry) VALUES (1, 'Dispensary C', 'Colorado', '2023-05-01');", "sql": "SELECT COUNT(*) as num_dispensaries FROM dispensaries WHERE state = 'Colorado' AND license_expiry >= '2023-01-01';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 114, "num_statements": 1}
{"question": "What is the percentage change in water consumption for households in Tokyo from 2017 to 2018?", "schema": "CREATE TABLE Household_Water_Usage (Household_ID INT, City VARCHAR(20), Year INT, Water_Consumption FLOAT); INSERT INTO Household_Water_Usage (Household_ID, City, Year, Water_Consumption) VALUES (1, 'Tokyo', 2017, 120.5), (2, 'Tokyo', 2018, 110.2);", "sql": "SELECT (COUNT(*) * 100.0 / (SELECT COUNT(*) FROM Household_Water_Usage WHERE City = 'Tokyo' AND Year = 2017)) - 100.0 AS Percentage_Change FROM Household_Water_Usage WHERE City = 'Tokyo' AND Year = 2018;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 203, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What college did the player for the Hamilton Tiger-Cats go to?", "schema": "CREATE TABLE table_21321804_3 (college VARCHAR, cfl_team VARCHAR)", "sql": "SELECT college FROM table_21321804_3 WHERE cfl_team = 'Hamilton Tiger-Cats';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What is the Cover Date of the Story Title Spacehikers (Part 2)?", "schema": "CREATE TABLE table_name_59 (cover_date VARCHAR, story_title VARCHAR)", "sql": "SELECT cover_date FROM table_name_59 WHERE story_title = 'spacehikers (part 2)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1}
{"question": "Write the DDL statement from PostgreSQL regression test 'rowsecurity' (example 90).", "schema": null, "sql": "CREATE TABLE uaccount (\n pguser name primary key,\n seclv int\n);", "explanation": "DDL from PostgreSQL core regression test for Rowsecurity.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1}
{"question": "How many donors have donated to each cause?", "schema": "CREATE TABLE Donors (DonorID INT, DonorName VARCHAR(50), DonationAmount DECIMAL(10,2), CauseID INT);CREATE TABLE Causes (CauseID INT, CauseName VARCHAR(50));", "sql": "SELECT C.CauseName, COUNT(D.DonorID) FROM Donors D JOIN Causes C ON D.CauseID = C.CauseID GROUP BY C.CauseName;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 111, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: How many countries were sampled for the index in 2nd place in the LA ranking and 23rd in the world ranking?", "schema": "CREATE TABLE table_19948664_1 (countries_sampled VARCHAR, ranking_la__2_ VARCHAR, world_ranking__1_ VARCHAR)", "sql": "SELECT countries_sampled FROM table_19948664_1 WHERE ranking_la__2_ = '2nd' AND world_ranking__1_ = '23rd';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 107, "num_statements": 1}
{"question": "What is the number of disaster preparedness workshops held in Houston and their respective attendance?\"", "schema": "CREATE TABLE houston_disaster_preparedness (id INT, workshop_name VARCHAR(255), city VARCHAR(255), attendance INT); INSERT INTO houston_disaster_preparedness (id, workshop_name, city, attendance) VALUES (1, 'Hurricane Preparedness', 'Houston', 30);", "sql": "SELECT workshop_name, SUM(attendance) as total_attendance FROM houston_disaster_preparedness WHERE city = 'Houston' GROUP BY workshop_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 139, "num_statements": 1}
{"question": "What is the minimum energy storage capacity of pumped hydro storage plants in Japan?", "schema": "CREATE TABLE pumped_hydro_storage (id INT, name TEXT, country TEXT, capacity FLOAT); INSERT INTO pumped_hydro_storage (id, name, country, capacity) VALUES (1, 'Kannagawa', 'Japan', 245), (2, 'Okuyoshino', 'Japan', 270), (3, 'Shimizu', 'Japan', 300), (4, 'Okutataragi', 'Japan', 336);", "sql": "SELECT MIN(capacity) FROM pumped_hydro_storage WHERE country = 'Japan';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1}
{"question": "What is the maximum labor practice rating for products in each category, by brand?", "schema": "CREATE TABLE Brands (id INT, brand VARCHAR(255)); INSERT INTO Brands (id, brand) VALUES (1, 'BrandA'), (2, 'BrandB'), (3, 'BrandC'); CREATE TABLE Products (id INT, product VARCHAR(255), category VARCHAR(255), brand_id INT, labor_practice_rating DECIMAL(3, 2)); INSERT INTO Products (id, product, category, brand_id, labor_practice_rating) VALUES (1, 'Product1', 'CategoryA', 1, 4.50), (2, 'Product2', 'CategoryA', 1, 4.75), (3, 'Product3', 'CategoryB', 2, 3.25), (4, 'Product4', 'CategoryB', 2, 3.50), (5, 'Product5', 'CategoryC', 3, 4.00), (6, 'Product6', 'CategoryC', 3, 4.25);", "sql": "SELECT b.brand, p.category, MAX(p.labor_practice_rating) AS max_rating FROM Products p JOIN Brands b ON p.brand_id = b.id GROUP BY b.brand, p.category;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 151, "num_statements": 1}
{"question": "What is the average years of experience for investigative journalists in the \"investigative_journalists\" table who are from India?", "schema": "CREATE TABLE investigative_journalists (id INT, name VARCHAR(50), country VARCHAR(50), years_of_experience INT); INSERT INTO investigative_journalists (id, name, country, years_of_experience) VALUES (1, 'John Doe', 'USA', 10), (2, 'Jane Smith', 'Canada', 12), (3, 'Pedro Martinez', 'Mexico', 8), (4, 'Rajesh Patel', 'India', 15), (5, 'Anna Lee', 'India', 20);", "sql": "SELECT AVG(years_of_experience) FROM investigative_journalists WHERE country = 'India';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1}
{"question": "What is the average CO2 emission reduction (in metric tons) per green building in the 'green_buildings' and 'carbon_offset' schemas, grouped by city?", "schema": "CREATE TABLE green_buildings.green_buildings_data (city VARCHAR(20), green_building BOOLEAN); CREATE TABLE carbon_offset.offset_initiatives (city VARCHAR(20), co2_reduction_tons INT);", "sql": "SELECT city, AVG(co2_reduction_tons / (SELECT COUNT(*) FROM green_buildings.green_buildings_data WHERE green_building = TRUE)) AS avg_co2_reduction_per_green_building FROM carbon_offset.offset_initiatives WHERE green_building = TRUE GROUP BY city;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 247, "num_statements": 1}
{"question": "What is the average amount of socially responsible loans issued by financial institutions in Africa?", "schema": "CREATE TABLE financial_institutions (institution_id INT, institution_name TEXT, region TEXT);CREATE TABLE loans (loan_id INT, institution_id INT, loan_amount DECIMAL, is_socially_responsible BOOLEAN);", "sql": "SELECT AVG(loan_amount) FROM loans JOIN financial_institutions ON loans.institution_id = financial_institutions.institution_id WHERE is_socially_responsible = TRUE AND region = 'Africa';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 186, "num_statements": 1}
{"question": "What is the average water temperature in January for all salmon farms in Norway?", "schema": "CREATE TABLE salmon_farms (id INT, name TEXT, country TEXT, latitude DECIMAL(9,6), longitude DECIMAL(9,6)); INSERT INTO salmon_farms (id, name, country, latitude, longitude) VALUES (1, 'Farm A', 'Norway', 60.123456, 7.123456), (2, 'Farm B', 'Norway', 62.123456, 8.123456); CREATE TABLE temperature_readings (id INT, farm_id INT, date DATE, temperature DECIMAL(5,2)); INSERT INTO temperature_readings (id, farm_id, date, temperature) VALUES (1, 1, '2022-01-01', 8.5), (2, 1, '2022-01-02', 9.0), (3, 2, '2022-01-01', 7.0), (4, 2, '2022-01-02', 7.5);", "sql": "SELECT AVG(temperature) FROM temperature_readings tr JOIN salmon_farms sf ON tr.farm_id = sf.id WHERE sf.country = 'Norway' AND MONTH(tr.date) = 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 147, "num_statements": 1}
{"question": "What is the average price of cotton textiles sourced from India?", "schema": "CREATE TABLE textile_sourcing (id INT, material VARCHAR(20), country VARCHAR(20), price DECIMAL(5,2)); INSERT INTO textile_sourcing (id, material, country, price) VALUES (1, 'cotton', 'India', 3.50), (2, 'silk', 'China', 15.00), (3, 'wool', 'Australia', 12.00);", "sql": "SELECT AVG(price) FROM textile_sourcing WHERE material = 'cotton' AND country = 'India';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1}
{"question": "How many different types of crops were grown in each region over the past 5 years?", "schema": "CREATE TABLE Crops (date DATE, crop_type VARCHAR(20), region VARCHAR(20));", "sql": "SELECT region, COUNT(DISTINCT crop_type) OVER(PARTITION BY region ORDER BY region ROWS BETWEEN 5 PRECEDING AND CURRENT ROW) as crop_types FROM Crops;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 149, "num_statements": 1}
{"question": "What is the total number of police officers and firefighters in each city district?", "schema": "CREATE TABLE districts (did INT, name VARCHAR(255)); CREATE TABLE police_officers (oid INT, did INT, rank VARCHAR(255)); CREATE TABLE firefighters (fid INT, did INT, rank VARCHAR(255)); INSERT INTO districts VALUES (1, 'Downtown'), (2, 'Uptown'); INSERT INTO police_officers VALUES (1, 1, 'Captain'), (2, 2, 'Lieutenant'); INSERT INTO firefighters VALUES (1, 1, 'Captain'), (2, 2, 'Lieutenant');", "sql": "SELECT d.name, COUNT(po.oid) + COUNT(f.fid) as total_employees FROM districts d LEFT JOIN police_officers po ON d.did = po.did LEFT JOIN firefighters f ON d.did = f.did GROUP BY d.did;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 184, "num_statements": 1}
{"question": "What was the average cost of vegetarian menu items?", "schema": "CREATE TABLE menu_items (item VARCHAR(50), type VARCHAR(15), cost DECIMAL(10,2)); INSERT INTO menu_items (item, type, cost) VALUES ('Pizza Margherita', 'Vegetarian', 30.00), ('Caesar Salad', 'Vegetarian', 15.00); CREATE VIEW veg_menu_items AS SELECT item, cost FROM menu_items WHERE type = 'Vegetarian';", "sql": "SELECT AVG(cost) FROM veg_menu_items;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 37, "num_statements": 1}
{"question": "pgTAP test for Cmpok (assertion 10).", "schema": null, "sql": "SELECT * FROM check_test(\n isa_ok( ''::text, 'text', 'an empty string' ),\n true,\n 'isa_ok(\"\", text, desc)',\n 'an empty string isa text',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Cmpok.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 158, "num_statements": 1}
{"question": "What is the total waste quantity generated and the total number of circular economy initiatives, for each location and material, for the fourth quarter of 2024?", "schema": "CREATE TABLE WasteGeneration (Date date, Location text, Material text, Quantity integer);CREATE TABLE CircularEconomyInitiatives (Location text, Initiative text, StartDate date);", "sql": "SELECT wg.Location, wg.Material, SUM(wg.Quantity) as TotalWasteQuantity, COUNT(DISTINCT cei.Initiative) as NumberOfInitiatives FROM WasteGeneration wg LEFT JOIN CircularEconomyInitiatives cei ON wg.Location = cei.Location WHERE wg.Date >= '2024-10-01' AND wg.Date < '2025-01-01' GROUP BY wg.Location, wg.Material;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 313, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What is the home team on March 7?", "schema": "CREATE TABLE table_name_61 (home VARCHAR, date VARCHAR)", "sql": "SELECT home FROM table_name_61 WHERE date = 'march 7';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1}
{"question": "What is the maximum number of followers for users from India?", "schema": "CREATE TABLE users (id INT, name VARCHAR(50), country VARCHAR(2), followers INT); INSERT INTO users (id, name, country, followers) VALUES (1, 'Alice', 'US', 1000), (2, 'Bob', 'IN', 2000), (3, 'Charlie', 'CA', 1500);", "sql": "SELECT MAX(users.followers) as max_followers FROM users WHERE users.country = 'IN';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Which Points have an Opponent of vancouver canucks, and a November smaller than 11?", "schema": "CREATE TABLE table_name_42 (points INTEGER, opponent VARCHAR, november VARCHAR)", "sql": "SELECT AVG(points) FROM table_name_42 WHERE opponent = 'vancouver canucks' AND november < 11;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What is the party of the youngest people?", "schema": "CREATE TABLE people (Party VARCHAR, Age VARCHAR)", "sql": "SELECT Party FROM people ORDER BY Age LIMIT 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 46, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Which location includes Coast Mountains with a rank less than 18 at Skihist Mountain?", "schema": "CREATE TABLE table_name_41 (location VARCHAR, mountain_peak VARCHAR, mountain_range VARCHAR, rank VARCHAR)", "sql": "SELECT location FROM table_name_41 WHERE mountain_range = 'coast mountains' AND rank < 18 AND mountain_peak = 'skihist mountain';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 129, "num_statements": 1}
{"question": "Add a new rural healthcare facility to the 'rural_facilities' table", "schema": "CREATE TABLE rural_facilities (id INT PRIMARY KEY, name VARCHAR(255), type VARCHAR(50), capacity INT, location VARCHAR(255));", "sql": "INSERT INTO rural_facilities (id, name, type, capacity, location) VALUES (1, 'Rural Health Clinic', 'Clinic', 20, '789 Elm St, Ruralville, CA 93420');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 150, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Tell me the date for dinah pfizenmaier anna zaja and winner", "schema": "CREATE TABLE table_name_46 (date VARCHAR, outcome VARCHAR, opponents VARCHAR)", "sql": "SELECT date FROM table_name_46 WHERE outcome = 'winner' AND opponents = 'dinah pfizenmaier anna zaja';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 102, "num_statements": 1}
{"question": "What is the total number of space missions launched before 1999?", "schema": "CREATE TABLE Missions (id INT, name VARCHAR(50), launch_year INT); INSERT INTO Missions (id, name, launch_year) VALUES (1, 'Mission1', 2000), (2, 'Mission2', 1999), (3, 'Mission3', 2001);", "sql": "SELECT COUNT(*) FROM Missions WHERE launch_year < 1999;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Who won the bronze medal in Hiroshima?", "schema": "CREATE TABLE table_name_90 (bronze VARCHAR, location VARCHAR)", "sql": "SELECT bronze FROM table_name_90 WHERE location = 'hiroshima';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1}
{"question": "What is the total number of orders placed by customers in the 'extra-large' size range?", "schema": "CREATE TABLE customer_size(customer_id INT, size VARCHAR(10)); INSERT INTO customer_size(customer_id, size) VALUES(1, 'large'), (2, 'medium'), (3, 'extra-large'), (4, 'small');", "sql": "SELECT COUNT(*) FROM customer_size WHERE size = 'extra-large';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1}
{"question": "Delete all records in the 'digital_divide_stats' table where the 'year' is before 2010", "schema": "CREATE TABLE digital_divide_stats (id INT PRIMARY KEY, country VARCHAR(255), year INT, internet_users INT, total_population INT);", "sql": "WITH deleted_data AS (DELETE FROM digital_divide_stats WHERE year < 2010 RETURNING *) SELECT * FROM deleted_data;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_delete", "is_postgresql_specific": true, "sql_length": 113, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Name the President who has a Treasurer of james davidson, and a Season of 2006–2007?", "schema": "CREATE TABLE table_name_26 (president VARCHAR, treasurer VARCHAR, season VARCHAR)", "sql": "SELECT president FROM table_name_26 WHERE treasurer = 'james davidson' AND season = '2006–2007';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1}
{"question": "PostgreSQL regression test 'jsonb_jsonpath': Write the SELECT query (example 393).", "schema": null, "sql": "select jsonb_path_query('\"Yes\"', '$.boolean().type()');", "explanation": "Regression test for Jsonb Jsonpath in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select jsonb_path_query('\"Yes\"', '$.boolean().type()')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1}
{"question": "Write the DDL statement from PostgreSQL regression test 'matview' (example 64).", "schema": null, "sql": "CREATE VIEW mvtest_vt2 AS SELECT moo, 2*moo FROM mvtest_vt1 UNION ALL SELECT moo, 3*moo FROM mvtest_vt1;", "explanation": "DDL from PostgreSQL core regression test for Matview.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_view", "is_postgresql_specific": false, "sql_length": 104, "num_statements": 1}
{"question": "What is the capacity of landfills that serve cities with a population above 500,000?", "schema": "CREATE TABLE Cities (CityID INT, CityName VARCHAR(50), Population INT, LandfillID INT); INSERT INTO Cities VALUES (1, 'CityA', 600000, 1), (2, 'CityB', 800000, 2), (3, 'CityC', 550000, 3); CREATE TABLE Landfills (LandfillID INT, LandfillName VARCHAR(50), Capacity FLOAT); INSERT INTO Landfills VALUES (1, 'Landfill1', 2000000), (2, 'Landfill2', 3000000), (3, 'Landfill3', 2500000);", "sql": "SELECT LandfillName, Capacity FROM Landfills WHERE LandfillID IN (SELECT Cities.LandfillID FROM Cities WHERE Population > 500000);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 130, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What is the L2 cache of the microprocessor with model number pentium dual-core t3400?", "schema": "CREATE TABLE table_name_91 (l2_cache VARCHAR, model_number VARCHAR)", "sql": "SELECT l2_cache FROM table_name_91 WHERE model_number = 'pentium dual-core t3400';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What are the processors supported by a ddr2 memory and the nforce 550 model?", "schema": "CREATE TABLE table_name_91 (processors_supported VARCHAR, memory VARCHAR, model VARCHAR)", "sql": "SELECT processors_supported FROM table_name_91 WHERE memory = 'ddr2' AND model = 'nforce 550';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1}
{"question": "List the unique medical conditions of astronauts who have participated in space missions longer than 30 days.", "schema": "CREATE TABLE Astronaut_Medical (Astronaut_ID INT, Medical_Condition VARCHAR(50), Mission_Duration INT); INSERT INTO Astronaut_Medical (Astronaut_ID, Medical_Condition, Mission_Duration) VALUES (1, 'Motion Sickness', 300); INSERT INTO Astronaut_Medical (Astronaut_ID, Medical_Condition, Mission_Duration) VALUES (2, 'Space Adaptation Syndrome', 35); INSERT INTO Astronaut_Medical (Astronaut_ID, Medical_Condition, Mission_Duration) VALUES (3, 'Back Pain', 250);", "sql": "SELECT DISTINCT Medical_Condition FROM Astronaut_Medical WHERE Mission_Duration > 30;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What was Stockholm's score when Malmo scored 2?", "schema": "CREATE TABLE table_name_54 (stockholm VARCHAR, malmö VARCHAR)", "sql": "SELECT stockholm FROM table_name_54 WHERE malmö = '2';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1}
{"question": "PL/pgSQL test: Plperl Array (example 4).", "schema": null, "sql": "select plperl_sum_array('{{1,2,3}, {4,5,6}}');", "explanation": "PL/pgSQL example from PostgreSQL source test for Plperl Array.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 46, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Who owned winner Blueeyesintherein after 2009?", "schema": "CREATE TABLE table_name_75 (owner VARCHAR, year VARCHAR, winner VARCHAR)", "sql": "SELECT owner FROM table_name_75 WHERE year > 2009 AND winner = 'blueeyesintherein';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1}
{"question": "PostgreSQL regression test 'jsonb_jsonpath': Write the SELECT query (example 56).", "schema": null, "sql": "select jsonb_path_query('1', 'strict $[1]');", "explanation": "Regression test for Jsonb Jsonpath in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select jsonb_path_query('1', 'strict $[1]')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": false, "sql_length": 44, "num_statements": 1}
{"question": "How many operators were hired in the last month in each factory?", "schema": "CREATE TABLE factories(id INT, name TEXT, location TEXT);CREATE TABLE operators(id INT, factory_id INT, hire_date DATE);INSERT INTO factories(id, name, location) VALUES (1, 'Factory A', 'Location A'), (2, 'Factory B', 'Location B'); INSERT INTO operators(id, factory_id, hire_date) VALUES (1, 1, '2021-04-01'), (2, 1, '2021-05-01'), (3, 2, '2021-03-15');", "sql": "SELECT factory_id, COUNT(*) as new_hires FROM operators WHERE hire_date >= DATE_SUB(CURRENT_DATE, INTERVAL 1 MONTH) GROUP BY factory_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 136, "num_statements": 1}
{"question": "Write the PL/pgSQL object from PostgreSQL regression test 'foreign_key' (example 578).", "schema": null, "sql": "\\d fk_notpartitioned_pk\n\n-- Check the existing FK trigger\nSELECT conname, tgrelid::regclass as tgrel, regexp_replace(tgname, '[0-9]+', 'N') as tgname, tgtype\nFROM pg_trigger t JOIN pg_constraint c ON (t.tgconstraint = c.oid)\nWHERE tgrelid IN (SELECT relid FROM pg_partition_tree('fk_partitioned_fk'::regclass)\n\t\t\t\t UNION ALL SELECT 'fk_notpartitioned_pk'::regclass)\nORDER BY tgrelid, tgtype;", "explanation": "PL/pgSQL object from PostgreSQL core test for Foreign Key.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 392, "num_statements": 1}
{"question": "Which organizations have a location in 'Australia' and are of type 'Government'?", "schema": "CREATE TABLE organizations (id INT PRIMARY KEY, name VARCHAR(50), type VARCHAR(50), location VARCHAR(50)); INSERT INTO organizations (id, name, type, location) VALUES (1, 'Australian Renewable Energy Agency', 'Government', 'Australia'); INSERT INTO organizations (id, name, type, location) VALUES (2, 'Sustainable Energy Authority of Ireland', 'Government', 'Ireland');", "sql": "SELECT organizations.name, organizations.type, organizations.location FROM organizations WHERE organizations.location = 'Australia' AND organizations.type = 'Government';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 170, "num_statements": 1}
{"question": "What was the average price of electric vehicle charging in Paris per kWh in Q2 2022?", "schema": "CREATE TABLE EV_Charging_Prices (city VARCHAR(20), quarter INT, year INT, avg_price DECIMAL(5,2)); INSERT INTO EV_Charging_Prices (city, quarter, year, avg_price) VALUES ('Paris', 2, 2022, 0.25), ('Paris', 3, 2022, 0.26), ('Berlin', 2, 2022, 0.28), ('Berlin', 3, 2022, 0.29);", "sql": "SELECT AVG(avg_price) FROM EV_Charging_Prices WHERE city = 'Paris' AND quarter = 2 AND year = 2022;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 99, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What is the A330 for A310 B10?", "schema": "CREATE TABLE table_name_62 (a330 VARCHAR, a310 VARCHAR)", "sql": "SELECT a330 FROM table_name_62 WHERE a310 = 'b10';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 50, "num_statements": 1}
{"question": "How many investments were made in total in Q2 2021?", "schema": "CREATE TABLE investments (id INT, region VARCHAR(20), date DATE); INSERT INTO investments (id, region, date) VALUES (1, 'Asia-Pacific', '2021-01-05'), (2, 'Europe', '2021-02-10'), (3, 'Asia-Pacific', '2021-03-25'), (4, 'Africa', '2021-04-15'), (5, 'Europe', '2021-06-01');", "sql": "SELECT COUNT(*) FROM investments WHERE date BETWEEN '2021-04-01' AND '2021-06-30';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What is the tier IV year for the tournament held in Tampa?", "schema": "CREATE TABLE table_20630462_1 (tier_iv_in VARCHAR, tournament VARCHAR)", "sql": "SELECT tier_iv_in FROM table_20630462_1 WHERE tournament = 'Tampa';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1}
{"question": "What is the total area of sustainable forests in square kilometers?", "schema": "CREATE TABLE forest (id INT, name TEXT, area_sqkm FLOAT, is_sustainable BOOLEAN);", "sql": "SELECT SUM(area_sqkm) FROM forest WHERE is_sustainable = TRUE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1}
{"question": "What are the regulatory frameworks for 'Country4' and 'Country5'?", "schema": "CREATE TABLE countries (id INT, name VARCHAR(255)); INSERT INTO countries (id, name) VALUES (1, 'Country1'); INSERT INTO countries (id, name) VALUES (2, 'Country2'); INSERT INTO countries (id, name) VALUES (3, 'Country3'); INSERT INTO countries (id, name) VALUES (4, 'Country4'); INSERT INTO countries (id, name) VALUES (5, 'Country5'); CREATE TABLE regulatory_frameworks (id INT, country_id INT, name VARCHAR(255)); INSERT INTO regulatory_frameworks (id, country_id, name) VALUES (1, 1, 'Framework1'); INSERT INTO regulatory_frameworks (id, country_id, name) VALUES (2, 1, 'Framework2'); INSERT INTO regulatory_frameworks (id, country_id, name) VALUES (3, 2, 'Framework3'); INSERT INTO regulatory_frameworks (id, country_id, name) VALUES (4, 4, 'Framework4'); INSERT INTO regulatory_frameworks (id, country_id, name) VALUES (5, 4, 'Framework5'); INSERT INTO regulatory_frameworks (id, country_id, name) VALUES (6, 5, 'Framework6');", "sql": "SELECT name FROM regulatory_frameworks WHERE country_id IN (4, 5);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1}
{"question": "List the banks with the lowest number of financial capability programs offered in Q2 2022, in ascending order?", "schema": "CREATE TABLE FINANCIAL_CAPABILITY_PROGRAMS (BANK_NAME VARCHAR(50), PROGRAM_NAME VARCHAR(50), START_DATE DATE); INSERT INTO FINANCIAL_CAPABILITY_PROGRAMS VALUES ('Bank F', 'Program I', '2022-04-15'); INSERT INTO FINANCIAL_CAPABILITY_PROGRAMS VALUES ('Bank G', 'Program J', '2022-05-20'); INSERT INTO FINANCIAL_CAPABILITY_PROGRAMS VALUES ('Bank F', 'Program K', '2022-06-05'); INSERT INTO FINANCIAL_CAPABILITY_PROGRAMS VALUES ('Bank H', 'Program L', '2022-04-01');", "sql": "SELECT BANK_NAME, COUNT(*) TOTAL_PROGRAMS FROM FINANCIAL_CAPABILITY_PROGRAMS WHERE START_DATE >= '2022-04-01' AND START_DATE < '2022-07-01' GROUP BY BANK_NAME ORDER BY TOTAL_PROGRAMS ASC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 187, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Which League showed 7,975 for an average attendance?", "schema": "CREATE TABLE table_name_20 (league VARCHAR, attendance_average VARCHAR)", "sql": "SELECT league FROM table_name_20 WHERE attendance_average = '7,975';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1}
{"question": "What is the total number of public transportation users in New York, London, and Paris in 2020?", "schema": "CREATE TABLE CityTransport (city VARCHAR(30), users INT, year INT); INSERT INTO CityTransport (city, users, year) VALUES ('New York', 1000000, 2020), ('London', 1200000, 2020), ('Paris', 1100000, 2020);", "sql": "SELECT SUM(users) FROM CityTransport WHERE city IN ('New York', 'London', 'Paris') AND year = 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 99, "num_statements": 1}
{"question": "What is the minimum fare for a trolleybus in the 'Delhi' region?", "schema": "CREATE TABLE trolleybuses (id INT, region VARCHAR(20), fare DECIMAL(5,2)); INSERT INTO trolleybuses (id, region, fare) VALUES (1, 'Delhi', 15.00), (2, 'Delhi', 20.00), (3, 'Mumbai', 12.00);", "sql": "SELECT MIN(fare) FROM trolleybuses WHERE region = 'Delhi';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1}
{"question": "What is the average number of games played by players from the United States?", "schema": "CREATE TABLE Players (PlayerID INT, PlayerName VARCHAR(50), Age INT, Country VARCHAR(50)); INSERT INTO Players (PlayerID, PlayerName, Age, Country) VALUES (1, 'John Doe', 25, 'USA'); INSERT INTO Players (PlayerID, PlayerName, Age, Country) VALUES (2, 'Jane Smith', 30, 'Canada');", "sql": "SELECT AVG(Age) FROM Players WHERE Country = 'USA';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1}
{"question": "How many matches did each player participate in during the 2020 tennis season?", "schema": "CREATE TABLE tennis (player VARCHAR(255), match_id INT); INSERT INTO tennis (player, match_id) VALUES ('Federer', 1), ('Federer', 2), ('Federer', 3), ('Djokovic', 4), ('Djokovic', 5);", "sql": "SELECT player, COUNT(*) FROM tennis GROUP BY player;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1}
{"question": "What is the average fine for each violation in 'regulatory_compliance' table?", "schema": "CREATE TABLE regulatory_compliance (id INT, dispensary VARCHAR(255), fine FLOAT, violation DATE);", "sql": "SELECT violation, AVG(fine) as avg_fine FROM regulatory_compliance GROUP BY violation;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1}
{"question": "List all managers and their respective departments who have not completed diversity and inclusion training.", "schema": "CREATE TABLE Employees (EmployeeID INT, FirstName VARCHAR(50), LastName VARCHAR(50), Department VARCHAR(50), Position VARCHAR(50)); CREATE TABLE DiversityTraining (EmployeeID INT, TrainingID INT, Completed DATE); INSERT INTO Employees (EmployeeID, FirstName, LastName, Department, Position) VALUES (1, 'John', 'Doe', 'IT', 'Manager'), (2, 'Jane', 'Smith', 'HR', 'Manager'); INSERT INTO DiversityTraining (EmployeeID, TrainingID, Completed) VALUES (1, 1, '2021-06-01');", "sql": "SELECT Employees.FirstName, Employees.LastName, Employees.Department FROM Employees LEFT JOIN DiversityTraining ON Employees.EmployeeID = DiversityTraining.EmployeeID WHERE Employees.Position = 'Manager' AND DiversityTraining.Completed IS NULL;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 244, "num_statements": 1}
{"question": "Which department has the highest number of security incidents in the 'security_incidents' table?", "schema": "CREATE TABLE security_incidents (id INT, department VARCHAR(50), date DATE);", "sql": "SELECT department, COUNT(*) as incident_count FROM security_incidents GROUP BY department ORDER BY incident_count DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 127, "num_statements": 1}
{"question": "PostgreSQL regression test 'without_overlaps': Write the SELECT query (example 233).", "schema": null, "sql": "SELECT * FROM tp1 ORDER BY id, valid_at;", "explanation": "Regression test for Without Overlaps in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM tp1 ORDER BY id, valid_at) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 40, "num_statements": 1}
{"question": "What are the product names and their ratings for products with a rating greater than 4.5?", "schema": "CREATE TABLE products (product_id INT, product_name TEXT, rating FLOAT); INSERT INTO products (product_id, product_name, rating) VALUES (1, 'Product A', 4.5), (2, 'Product B', 4.2), (3, 'Product C', 4.8);", "sql": "SELECT product_name, rating FROM products WHERE rating > 4.5;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1}
{"question": "Show a query using PostgreSQL contrib extension 'ltree' (example 46).", "schema": null, "sql": "SELECT ''::ltree || 'Top.Child1.Child2'::ltree;", "explanation": "Example query from the 'ltree' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 47, "num_statements": 1}
{"question": "What was the average project timeline for sustainable building projects in New York?", "schema": "CREATE TABLE project_timelines (id INT PRIMARY KEY, project_name VARCHAR(255), state VARCHAR(255), start_date DATE, end_date DATE, sustainable VARCHAR(5));", "sql": "SELECT AVG(DATEDIFF(end_date, start_date)) FROM project_timelines WHERE state = 'New York' AND sustainable = 'yes';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 115, "num_statements": 1}
{"question": "How many inclusive housing units are in New York and Los Angeles combined?", "schema": "CREATE TABLE inclusive_housing (units INT, city VARCHAR(20));", "sql": "SELECT SUM(units) FROM inclusive_housing WHERE city IN ('New York', 'Los Angeles');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What team had the record asscoiated with the 9th inning?", "schema": "CREATE TABLE table_name_54 (team VARCHAR, inn VARCHAR)", "sql": "SELECT team FROM table_name_54 WHERE inn = '9th';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Which province has evening gown score of 7.61", "schema": "CREATE TABLE table_15081939_4 (province VARCHAR, evening_gown VARCHAR)", "sql": "SELECT province FROM table_15081939_4 WHERE evening_gown = '7.61';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What is the average Goals/Games for Rummenigge, Karl-Heinz, with Goals less than 162?", "schema": "CREATE TABLE table_name_66 (games VARCHAR, goals INTEGER, name VARCHAR)", "sql": "SELECT AVG(goals) / games FROM table_name_66 WHERE name = 'rummenigge, karl-heinz' AND goals < 162;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 99, "num_statements": 1}
{"question": "Which countries have no renewable energy projects in the renewable_projects table?", "schema": "CREATE TABLE renewable_projects (id INT, project_name VARCHAR(100), country VARCHAR(50)); INSERT INTO renewable_projects (id, project_name, country) VALUES (1, 'Renewable Project 1', 'Germany'), (2, 'Renewable Project 2', 'Sweden');", "sql": "SELECT rp.country FROM renewable_projects rp GROUP BY rp.country HAVING COUNT(*) = 0;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1}
{"question": "What is the total quantity of each ingredient used, broken down by day?", "schema": "CREATE TABLE ingredient_usage (ingredient_name VARCHAR(50), sale_date DATE, quantity INT); INSERT INTO ingredient_usage (ingredient_name, sale_date, quantity) VALUES ('Lettuce', '2023-03-01', 200), ('Tomatoes', '2023-03-01', 300), ('Cheese', '2023-03-01', 400), ('Lettuce', '2023-03-02', 250), ('Tomatoes', '2023-03-02', 350), ('Cheese', '2023-03-02', 450);", "sql": "SELECT sale_date, ingredient_name, SUM(quantity) FROM ingredient_usage GROUP BY sale_date, ingredient_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 107, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What was the score of the match that took place in the playoff round?", "schema": "CREATE TABLE table_name_31 (score VARCHAR, round VARCHAR)", "sql": "SELECT score FROM table_name_31 WHERE round = 'playoff';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What is Place, when To Par is less than 15, and when Score is 76-72-75-71=294?", "schema": "CREATE TABLE table_name_59 (place VARCHAR, to_par VARCHAR, score VARCHAR)", "sql": "SELECT place FROM table_name_59 WHERE to_par < 15 AND score = 76 - 72 - 75 - 71 = 294;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1}
{"question": "PostgreSQL regression test 'interval': Write the SELECT query (example 406).", "schema": null, "sql": "SELECT to_char('infinity'::interval, 'YYYY');", "explanation": "Regression test for Interval in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT to_char('infinity'::interval, 'YYYY')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 45, "num_statements": 1}
{"question": "What is the count of new hires in the last 3 months, by recruiter and source?", "schema": "CREATE TABLE Recruitment (RecruiterID int, RecruiterName varchar(50), CandidateSource varchar(50), NewHire bit, HireDate date); INSERT INTO Recruitment (RecruiterID, RecruiterName, CandidateSource, NewHire, HireDate) VALUES (1, 'Alice', 'Job Board', 1, '2022-01-01'), (2, 'Bob', 'Referral', 1, '2022-02-01'), (3, 'Charlie', 'Career Fair', 0, '2022-03-01');", "sql": "SELECT Recruitment.RecruiterName, Recruitment.CandidateSource, COUNT(CASE WHEN Recruitment.NewHire = 1 AND Recruitment.HireDate >= DATEADD(month, -3, GETDATE()) THEN 1 ELSE NULL END) as Count_of_New_Hires FROM Recruitment GROUP BY Recruitment.RecruiterName, Recruitment.CandidateSource;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 286, "num_statements": 1}
{"question": "Which underwriters have processed fewer than 100 policies in the last month?", "schema": "CREATE TABLE underwriter (underwriter_id INT, name VARCHAR(50)); CREATE TABLE policy (policy_id INT, underwriter_id INT, processing_date DATE);", "sql": "SELECT underwriter.name FROM underwriter LEFT JOIN (SELECT underwriter_id, COUNT(*) as policy_count FROM policy WHERE processing_date >= DATEADD(MONTH, -1, GETDATE()) GROUP BY underwriter_id) AS policy_count ON underwriter.underwriter_id = policy_count.underwriter_id WHERE policy_count.policy_count IS NULL OR policy_count.policy_count < 100;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 343, "num_statements": 1}
{"question": "PostgreSQL regression test 'timestamp': Write the SELECT query (example 170).", "schema": null, "sql": "select age(timestamp 'infinity', timestamp 'infinity');", "explanation": "Regression test for Timestamp in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select age(timestamp 'infinity', timestamp 'infinity')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1}
{"question": "Write the PL/pgSQL object from PostgreSQL regression test 'rangefuncs' (example 56).", "schema": null, "sql": "-- function in subselect\nselect * from rngfunc2 where f2 in (select f2 from rngfunct(rngfunc2.rngfuncid) z where z.rngfuncid = rngfunc2.rngfuncid) ORDER BY 1,2;", "explanation": "PL/pgSQL object from PostgreSQL core test for Rangefuncs.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 160, "num_statements": 1}
{"question": "What is the total amount of socially responsible loans issued by AltruisticBank in Q1 2021?", "schema": "CREATE TABLE AltruisticBank (id INT, loan_type VARCHAR(20), loan_amount INT, issue_date DATE); INSERT INTO AltruisticBank (id, loan_type, loan_amount, issue_date) VALUES (1, 'Socially Responsible', 7000, '2021-01-05');", "sql": "SELECT SUM(loan_amount) FROM AltruisticBank WHERE loan_type = 'Socially Responsible' AND QUARTER(issue_date) = 1 AND YEAR(issue_date) = 2021;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 141, "num_statements": 1}
{"question": "What vehicles passed the 'Pedestrian Safety Test' in the SafetyTesting table?", "schema": "CREATE TABLE SafetyTesting (Id INT, Vehicle VARCHAR(50), Test VARCHAR(50), Result VARCHAR(50)); INSERT INTO SafetyTesting (Id, Vehicle, Test, Result) VALUES (1, 'Volvo XC60', 'Frontal Crash Test', 'Passed'), (2, 'Nissan Leaf', 'Pedestrian Safety Test', 'Passed');", "sql": "SELECT Vehicle FROM SafetyTesting WHERE Test = 'Pedestrian Safety Test' AND Result = 'Passed';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1}
{"question": "PostgreSQL regression test 'jsonb': Write the SELECT query (example 361).", "schema": null, "sql": "select jsonb_populate_record_valid(NULL::jsb_ia, '{\"a\": [1, 2]}');", "explanation": "Regression test for Jsonb in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select jsonb_populate_record_valid(NULL::jsb_ia, '{\"a\": [1, 2]}')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Show the id, the account name, and other account details for all accounts by the customer with first name 'Meaghan'.", "schema": "CREATE TABLE Accounts (account_id VARCHAR, date_account_opened VARCHAR, account_name VARCHAR, other_account_details VARCHAR, customer_id VARCHAR); CREATE TABLE Customers (customer_id VARCHAR, customer_first_name VARCHAR)", "sql": "SELECT T1.account_id, T1.date_account_opened, T1.account_name, T1.other_account_details FROM Accounts AS T1 JOIN Customers AS T2 ON T1.customer_id = T2.customer_id WHERE T2.customer_first_name = 'Meaghan';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 205, "num_statements": 1}
{"question": "How many Arctic fox sightings have been recorded each year for the last 5 years?", "schema": "CREATE TABLE arctic_fox_sightings (id INT, year INT, sightings INT);", "sql": "SELECT year, SUM(sightings) FROM arctic_fox_sightings WHERE year BETWEEN (YEAR(CURRENT_DATE) - 5) AND YEAR(CURRENT_DATE) GROUP BY year;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 135, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Which Margin has a Dist (f) larger than 10, and a Race of king george vi & queen elizabeth stakes?", "schema": "CREATE TABLE table_name_33 (margin VARCHAR, dist__f_ VARCHAR, race VARCHAR)", "sql": "SELECT margin FROM table_name_33 WHERE dist__f_ > 10 AND race = 'king george vi & queen elizabeth stakes';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 106, "num_statements": 1}
{"question": "Insert new record for student 'Sara Lee' with ID 456 and disability 'Physical'", "schema": "CREATE TABLE Students (StudentID INT PRIMARY KEY, Name VARCHAR(50), Disability VARCHAR(20));", "sql": "INSERT INTO Students (StudentID, Name, Disability) VALUES (456, 'Sara Lee', 'Physical');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1}
{"question": "Which mine has the lowest labor productivity?", "schema": "CREATE TABLE mines (mine_id INT, name TEXT, location TEXT, productivity FLOAT); INSERT INTO mines (mine_id, name, location, productivity) VALUES (1, 'ABC Mine', 'USA', 1200), (2, 'DEF Mine', 'Canada', 800);", "sql": "SELECT name, MIN(productivity) FROM mines;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 42, "num_statements": 1}
{"question": "What is the average production budget for action movies released between 2000 and 2010?", "schema": "CREATE TABLE movies (id INT, title VARCHAR(100), genre VARCHAR(50), release_year INT, production_budget INT); INSERT INTO movies (id, title, genre, release_year, production_budget) VALUES (1, 'MovieA', 'Action', 2005, 15000000); INSERT INTO movies (id, title, genre, release_year, production_budget) VALUES (2, 'MovieB', 'Action', 2002, 20000000);", "sql": "SELECT AVG(production_budget) FROM movies WHERE genre = 'Action' AND release_year BETWEEN 2000 AND 2010;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 104, "num_statements": 1}
{"question": "PL/pgSQL test: Plpython Spi (example 41).", "schema": null, "sql": "CREATE OR REPLACE FUNCTION plan_composite_args() RETURNS test_composite_type AS $$\nplan = plpy.prepare(\"select $1 as c1\", [\"test_composite_type\"])\nres = plpy.execute(plan, [{\"a1\": 3, \"a2\": \"label\"}])\nreturn res[0][\"c1\"]\n$$ LANGUAGE plpython3u;", "explanation": "PL/pgSQL example from PostgreSQL source test for Plpython Spi.", "validation_query": null, "source": "plpgsql_source", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 243, "num_statements": 1}
{"question": "pgTAP test for Pgtap--Unpackaged--0.91.0 (assertion 573).", "schema": null, "sql": "ALTER EXTENSION pgtap ADD FUNCTION findfuncs( NAME, TEXT );", "explanation": "SQL assertion from pgTAP test suite for Pgtap--Unpackaged--0.91.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1}
{"question": "What is the daily revenue for each menu category in the last 30 days?", "schema": "CREATE TABLE daily_sales (sale_date DATE, menu_category VARCHAR(255), revenue INT);", "sql": "SELECT sale_date, menu_category, SUM(revenue) as daily_revenue FROM daily_sales WHERE sale_date BETWEEN DATEADD(day, -30, GETDATE()) AND GETDATE() GROUP BY sale_date, menu_category;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 181, "num_statements": 1}
{"question": "What is the most common type of crime in Paris, and how many times did it occur?", "schema": "CREATE TABLE crimes (id INT, city VARCHAR(255), date DATE, type VARCHAR(255), description TEXT); INSERT INTO crimes (id, city, date, type, description) VALUES (1, 'Paris', '2022-01-01', 'Theft', 'Bicycle theft'), (2, 'Paris', '2022-02-01', 'Vandalism', 'Graffiti');", "sql": "SELECT type, COUNT(*) FROM crimes WHERE city = 'Paris' GROUP BY type ORDER BY COUNT(*) DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 100, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: How many different results are there for the battles?", "schema": "CREATE TABLE battle (RESULT VARCHAR)", "sql": "SELECT COUNT(DISTINCT RESULT) FROM battle;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 42, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What location is listed from 2005-2010?", "schema": "CREATE TABLE table_26476336_2 (location VARCHAR, years VARCHAR)", "sql": "SELECT location FROM table_26476336_2 WHERE years = '2005-2010';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1}
{"question": "How many players are in each team?", "schema": "CREATE TABLE EsportsTeams (TeamID INT, TeamName TEXT, Location TEXT); CREATE TABLE EsportsPlayers (PlayerID INT, PlayerName TEXT, TeamID INT); INSERT INTO EsportsTeams (TeamID, TeamName, Location) VALUES (1, 'TeamA', 'CityA'), (2, 'TeamB', 'CityB'), (3, 'TeamC', 'CityC'); INSERT INTO EsportsPlayers (PlayerID, PlayerName, TeamID) VALUES (1, 'Player1', 1), (2, 'Player2', 1), (3, 'Player3', 2), (4, 'Player4', 3);", "sql": "SELECT TeamID, COUNT(*) FROM EsportsPlayers GROUP BY TeamID;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What european country had more than 14 goals, and 52 538 avg att?", "schema": "CREATE TABLE table_name_83 (europe VARCHAR, goals VARCHAR, avgatt VARCHAR)", "sql": "SELECT europe FROM table_name_83 WHERE goals > 14 AND avgatt = '52 538';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1}
{"question": "List the states with the most and fewest number of workplaces that have experienced labor disputes in the past year.", "schema": "CREATE TABLE WorkplaceDisputes (State TEXT, Disputes INT); INSERT INTO WorkplaceDisputes (State, Disputes) VALUES ('California', 150), ('Texas', 100), ('New York', 200), ('Florida', 50);", "sql": "SELECT State, Disputes FROM WorkplaceDisputes WHERE Disputes = (SELECT MAX(Disputes) FROM WorkplaceDisputes) OR Disputes = (SELECT MIN(Disputes) FROM WorkplaceDisputes);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 169, "num_statements": 1}
{"question": "How many permits were issued for commercial buildings in Los Angeles between 2018 and 2020?", "schema": "CREATE TABLE building_permits (permit_id INT, building_type VARCHAR(20), city VARCHAR(20), issue_date DATE); INSERT INTO building_permits (permit_id, building_type, city, issue_date) VALUES (4, 'Commercial', 'Los Angeles', '2018-04-01'), (5, 'Residential', 'Los Angeles', '2019-07-15'), (6, 'Commercial', 'Los Angeles', '2020-11-05');", "sql": "SELECT COUNT(*) FROM building_permits WHERE building_type = 'Commercial' AND city = 'Los Angeles' AND issue_date BETWEEN '2018-01-01' AND '2020-12-31';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 151, "num_statements": 1}
{"question": "What is the minimum and maximum salary for each position in the Mining department?", "schema": "CREATE TABLE Employees(id INT, name VARCHAR(50), department VARCHAR(50), position VARCHAR(50), salary FLOAT, full_time BOOLEAN, gender VARCHAR(50), start_date DATE);", "sql": "SELECT position, MIN(salary) AS Min_Salary, MAX(salary) AS Max_Salary FROM Employees WHERE department = 'Mining' GROUP BY position;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 131, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What player has the score of 67-72=139?", "schema": "CREATE TABLE table_name_58 (player VARCHAR, score VARCHAR)", "sql": "SELECT player FROM table_name_58 WHERE score = 67 - 72 = 139;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What rank was the team from Australia?", "schema": "CREATE TABLE table_name_77 (rank VARCHAR, country VARCHAR)", "sql": "SELECT COUNT(rank) FROM table_name_77 WHERE country = 'australia';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1}
{"question": "Find the digital assets that were launched most recently, along with the country they were launched in, in descending order.", "schema": "CREATE TABLE DigitalAssets (AssetID int, AssetName varchar(50), LaunchDate date); INSERT INTO DigitalAssets (AssetID, AssetName, LaunchDate) VALUES (1, 'Asset1', '2022-01-01'), (2, 'Asset2', '2022-02-01'), (3, 'Asset3', '2022-03-01');", "sql": "SELECT AssetName, Country, LaunchDate FROM (SELECT AssetName, Country, LaunchDate, ROW_NUMBER() OVER (ORDER BY LaunchDate DESC) as Rank FROM DigitalAssets) as RankedAssets WHERE Rank = 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 187, "num_statements": 1}
{"question": "List the carbon pricing schemes and their corresponding carbon prices for the year 2021, sorted by carbon price in descending order.", "schema": "CREATE TABLE carbon_pricing (scheme VARCHAR(255), year INT, carbon_price FLOAT); INSERT INTO carbon_pricing (scheme, year, carbon_price) VALUES ('ETS', 2021, 30.56);", "sql": "SELECT * FROM carbon_pricing WHERE year = 2021 ORDER BY carbon_price DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What was the title of series number 56?", "schema": "CREATE TABLE table_2468961_4 (title VARCHAR, no_in_series VARCHAR)", "sql": "SELECT title FROM table_2468961_4 WHERE no_in_series = 56;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1}
{"question": "What is the maximum number of visitors for an exhibition in Chicago in 2020?", "schema": "CREATE TABLE ExhibitionsChicago (id INT, exhibition_name VARCHAR(30), city VARCHAR(20), year INT, visitor_count INT); INSERT INTO ExhibitionsChicago (id, exhibition_name, city, year, visitor_count) VALUES (1, 'Van Gogh in Chicago', 'Chicago', 2020, 100000), (2, 'Impressionism in Motion', 'Chicago', 2020, 85000), (3, 'Ancient Egypt', 'Chicago', 2020, 120000);", "sql": "SELECT exhibition_name, MAX(visitor_count) FROM ExhibitionsChicago WHERE city = 'Chicago' AND year = 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 106, "num_statements": 1}
{"question": "What is the number of military personnel in each branch of the Chinese military?", "schema": "CREATE TABLE MilitaryPersonnel (id INT, name VARCHAR(255), branch VARCHAR(255), personnel_count INT); INSERT INTO MilitaryPersonnel (id, name, branch, personnel_count) VALUES (1, 'Li Wei', 'Ground Forces', 800000), (2, 'Zhang Li', 'Air Force', 450000), (3, 'Wang Xiao', 'Navy', 300000);", "sql": "SELECT branch, personnel_count FROM MilitaryPersonnel WHERE branch IN ('Ground Forces', 'Air Force', 'Navy') GROUP BY branch;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 125, "num_statements": 1}
{"question": "What is the total fare collected for each month?", "schema": "CREATE TABLE trip (trip_id INT, fare DECIMAL(10,2), trip_date DATE); INSERT INTO trip (trip_id, fare, trip_date) VALUES (1, 2.00, '2022-01-01'), (2, 3.00, '2022-01-02'), (3, 4.00, '2022-02-01'), (4, 5.00, '2022-02-02');", "sql": "SELECT EXTRACT(MONTH FROM trip_date) AS month, SUM(fare) AS total_fare FROM trip GROUP BY month;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1}
{"question": "What is the total number of hours of professional development per instructor per district?", "schema": "CREATE TABLE development_hours (teacher_id INT, district_id INT, hours_developed INT);", "sql": "SELECT d.district_id, t.instructor_id, SUM(d.hours_developed) as total_hours FROM development_hours d INNER JOIN teachers t ON d.teacher_id = t.teacher_id GROUP BY d.district_id, t.instructor_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 195, "num_statements": 1}
{"question": "What is the average budget allocated for disability support programs per region, ordered by the highest average budget?", "schema": "CREATE TABLE Disability_Support_Programs (Region VARCHAR(50), Budget NUMERIC(10,2)); INSERT INTO Disability_Support_Programs VALUES ('Northeast', 500000), ('Southeast', 600000), ('Midwest', 400000), ('Southwest', 700000), ('West', 550000);", "sql": "SELECT Region, AVG(Budget) as Avg_Budget FROM Disability_Support_Programs GROUP BY Region ORDER BY Avg_Budget DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 115, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: List the opposing team on february 15, 2003.", "schema": "CREATE TABLE table_26360571_2 (opponent VARCHAR, date VARCHAR)", "sql": "SELECT opponent FROM table_26360571_2 WHERE date = 'February 15, 2003';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1}
{"question": "What is the average carbon offset by project?", "schema": "CREATE TABLE carbon_offsets (project_id INT, carbon_offsets FLOAT); INSERT INTO carbon_offsets (project_id, carbon_offsets) VALUES (1, 1200.5), (2, 1800.75), (3, 2500.33);", "sql": "SELECT AVG(carbon_offsets) FROM carbon_offsets;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 47, "num_statements": 1}
{"question": "Calculate the total water usage for all mining operations, per month", "schema": "CREATE TABLE WaterUsage (SiteID INT, UsageDate DATE, AmountUsed INT); INSERT INTO WaterUsage (SiteID, UsageDate, AmountUsed) VALUES (1, '2021-01-01', 500), (1, '2021-01-15', 700);", "sql": "SELECT DATE_FORMAT(UsageDate, '%Y-%m') as Month, SUM(AmountUsed) as TotalWaterUsage FROM WaterUsage GROUP BY Month;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 115, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Name the position with height of 2.09", "schema": "CREATE TABLE table_name_10 (position VARCHAR, height VARCHAR)", "sql": "SELECT position FROM table_name_10 WHERE height = 2.09;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Which engine did dr ing f porsche kg use with the porsche rsk (f2) chassis?", "schema": "CREATE TABLE table_name_77 (engine VARCHAR, entrant VARCHAR, chassis VARCHAR)", "sql": "SELECT engine FROM table_name_77 WHERE entrant = 'dr ing f porsche kg' AND chassis = 'porsche rsk (f2)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 104, "num_statements": 1}
{"question": "PL/pgSQL test: Plpython Trigger (example 116).", "schema": null, "sql": "-- check that using a function as a trigger over two tables works correctly\nCREATE FUNCTION trig1234() RETURNS trigger LANGUAGE plpython3u AS $$\n TD[\"new\"][\"data\"] = '1234'\n return 'MODIFY'\n$$;", "explanation": "PL/pgSQL example from PostgreSQL source test for Plpython Trigger.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "plpgsql_trigger", "is_postgresql_specific": false, "sql_length": 199, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: How much Latitude has a Water (sqmi) smaller than 0?", "schema": "CREATE TABLE table_name_69 (latitude VARCHAR, water__sqmi_ INTEGER)", "sql": "SELECT COUNT(latitude) FROM table_name_69 WHERE water__sqmi_ < 0;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1}
{"question": "What is the percentage of policy advocacy efforts focused on mental health in each continent?", "schema": "CREATE TABLE continent (continent_id INT, continent_name VARCHAR(50), country_code VARCHAR(5)); INSERT INTO continent (continent_id, continent_name, country_code) VALUES (1, 'North America', 'USA'), (2, 'Europe', 'FRA'); CREATE TABLE policy_advocacy (policy_id INT, policy_name VARCHAR(50), continent_id INT, focus_area VARCHAR(50)); INSERT INTO policy_advocacy (policy_id, policy_name, continent_id, focus_area) VALUES (1, 'Mental Health Parity Act', 1, 'Mental Health'), (2, 'Accessibility for Ontarians with Disabilities Act', 2, 'Physical Accessibility');", "sql": "SELECT C.continent_name, COUNT(PA.policy_id) * 100.0 / (SELECT COUNT(*) FROM policy_advocacy WHERE focus_area = 'Mental Health') as mental_health_percentage FROM policy_advocacy PA JOIN continent C ON PA.continent_id = C.continent_id WHERE PA.focus_area = 'Mental Health' GROUP BY C.continent_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 298, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: When was J.P. Viernes' last performance?", "schema": "CREATE TABLE table_name_26 (last_performance VARCHAR, name VARCHAR)", "sql": "SELECT last_performance FROM table_name_26 WHERE name = 'j.p. viernes';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "advanced", "category": "plpgsql", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: The golfer Jeff Sluman golfs for what country?", "schema": "CREATE TABLE table_name_13 (country VARCHAR, player VARCHAR)", "sql": "SELECT country FROM table_name_13 WHERE player = 'jeff sluman';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Who was the runner up before 2007?", "schema": "CREATE TABLE table_name_93 (runner_up VARCHAR, season INTEGER)", "sql": "SELECT runner_up FROM table_name_93 WHERE season < 2007;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: After 1971, what is the Rank with a Height ft (m) of 19.0 477 (145) and less than 35 Floors?", "schema": "CREATE TABLE table_name_90 (rank VARCHAR, height_ft__m_ VARCHAR, year VARCHAR, floors VARCHAR)", "sql": "SELECT rank FROM table_name_90 WHERE year > 1971 AND floors < 35 AND height_ft__m_ = '19.0 477 (145)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 102, "num_statements": 1}
{"question": "PostgreSQL regression test 'strings': Write the SELECT query (example 181).", "schema": null, "sql": "SELECT regexp_instr('abcabcabc', 'a.c', 1, 1, -1);", "explanation": "Regression test for Strings in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT regexp_instr('abcabcabc', 'a.c', 1, 1, -1)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 50, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Party of republican, and a District of 7th is what elected?", "schema": "CREATE TABLE table_name_81 (elected VARCHAR, party VARCHAR, district VARCHAR)", "sql": "SELECT elected FROM table_name_81 WHERE party = 'republican' AND district = '7th';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1}
{"question": "What is the maximum max speed of vessels that have a type of 'Passenger'?", "schema": "CREATE TABLE Vessel (vessel_id INT, name VARCHAR(255), type VARCHAR(255), max_speed DECIMAL(5,2)); INSERT INTO Vessel (vessel_id, name, type, max_speed) VALUES (1, 'Test Vessel 1', 'Cargo', 20.5), (2, 'Test Vessel 2', 'Tanker', 15.2), (3, 'Test Vessel 3', 'Passenger', 30.7), (4, 'Test Vessel 4', 'Passenger', 35.6);", "sql": "SELECT MAX(v.max_speed) FROM Vessel v WHERE v.type = 'Passenger';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1}
{"question": "Show a query using PostgreSQL contrib extension 'postgres_fdw' (example 1224).", "schema": null, "sql": "SELECT tableoid::regclass, * FROM batch_cp_upd_test ORDER BY 1;", "explanation": "Example query from the 'postgres_fdw' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1}
{"question": "How many public schools are there in the capital city?", "schema": "CREATE TABLE cities (city_id INT, city_name VARCHAR(255), capital_city BOOLEAN); INSERT INTO cities (city_id, city_name, capital_city) VALUES (1, 'Capital City', TRUE), (2, 'Second City', FALSE), (3, 'Third Town', FALSE); CREATE TABLE schools (school_id INT, school_name VARCHAR(255), city_id INT); INSERT INTO schools (school_id, school_name, city_id) VALUES (1, 'Capital High', 1), (2, 'North Secondary', 2), (3, 'East Elementary', 3);", "sql": "SELECT COUNT(*) FROM schools WHERE city_id = (SELECT city_id FROM cities WHERE capital_city = TRUE);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 100, "num_statements": 1}
{"question": "What is the average cargo handling time in hours for the 'handling_events' table?", "schema": "CREATE TABLE handling_events (event_id INT, port_id INT, event_time TIME); INSERT INTO handling_events (event_id, port_id, event_time) VALUES (1, 1, '12:30:00'), (2, 2, '10:00:00'), (3, 3, '14:00:00');", "sql": "SELECT AVG(TIME_TO_SEC(event_time) / 3600) FROM handling_events;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: what is the english translation when the artist is ann christine?", "schema": "CREATE TABLE table_name_10 (english_translation VARCHAR, artist VARCHAR)", "sql": "SELECT english_translation FROM table_name_10 WHERE artist = 'ann christine';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What is Assembled, when Summoned is \"6 October 1297\"?", "schema": "CREATE TABLE table_name_55 (assembled VARCHAR, summoned VARCHAR)", "sql": "SELECT assembled FROM table_name_55 WHERE summoned = '6 october 1297';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Find the names of all swimmers, sorted by their 100 meter scores in ascending order.", "schema": "CREATE TABLE swimmer (name VARCHAR, meter_100 VARCHAR)", "sql": "SELECT name FROM swimmer ORDER BY meter_100;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 44, "num_statements": 1}
{"question": "PL/pgSQL test: Plperl Array (example 27).", "schema": null, "sql": "select plperl_arrays_inout_l('{{1}, {2}, {3}}');", "explanation": "PL/pgSQL example from PostgreSQL source test for Plperl Array.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 48, "num_statements": 1}
{"question": "Show a query using PostgreSQL contrib extension 'btree_gin' (example 30).", "schema": null, "sql": "SELECT * FROM test_timestamp WHERE i<='infinity'::date ORDER BY i;", "explanation": "Example query from the 'btree_gin' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1}
{"question": "What is the average sugar content in organic vegan desserts?", "schema": "CREATE TABLE products (id INT, category TEXT, is_organic BOOLEAN, is_vegan BOOLEAN, sugar_grams FLOAT); INSERT INTO products (id, category, is_organic, is_vegan, sugar_grams) VALUES (1, 'dessert', true, true, 12.5), (2, 'dessert', false, true, 15.0), (3, 'dessert', true, false, 8.0), (4, 'dessert', false, false, 10.0);", "sql": "SELECT AVG(sugar_grams) FROM products WHERE is_organic = true AND is_vegan = true;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1}
{"question": "Calculate the percentage of time each type of equipment was in use in the past month.", "schema": "CREATE TABLE equipment_usage (id INT, equipment_type VARCHAR(255), usage_duration INT, timestamp DATETIME); INSERT INTO equipment_usage (id, equipment_type, usage_duration, timestamp) VALUES (1, 'Tractor', 120, '2022-01-01 10:00:00');", "sql": "SELECT equipment_type, 100.0 * SUM(usage_duration) / SUM(CASE WHEN equipment_type = 'Tractor' THEN usage_duration ELSE 0 END) as usage_percentage FROM equipment_usage WHERE timestamp >= DATE_SUB(CURRENT_TIMESTAMP(), INTERVAL 1 MONTH) GROUP BY equipment_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 258, "num_statements": 1}
{"question": "What is the maximum sea level rise recorded in the Atlantic region?", "schema": "CREATE TABLE sea_level_data (id INT, region VARCHAR(50), sea_level_rise DECIMAL); INSERT INTO sea_level_data (id, region, sea_level_rise) VALUES (1, 'Pacific', 0.3); INSERT INTO sea_level_data (id, region, sea_level_rise) VALUES (2, 'Atlantic', 0.5);", "sql": "SELECT MAX(sea_level_rise) FROM sea_level_data WHERE region = 'Atlantic';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1}
{"question": "What is the total income of clients in the Philippines who are under 35?", "schema": "CREATE TABLE clients (client_id INT, name VARCHAR(100), age INT, country VARCHAR(50), income DECIMAL(10,2)); INSERT INTO clients (client_id, name, age, country, income) VALUES (7, 'Maria Santos', 30, 'Philippines', 35000);", "sql": "SELECT SUM(income) FROM clients WHERE country = 'Philippines' AND age < 35;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: In what week was the away team Auckland?", "schema": "CREATE TABLE table_name_92 (week VARCHAR, away_team VARCHAR)", "sql": "SELECT week FROM table_name_92 WHERE away_team = 'auckland';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1}
{"question": "pgTAP test for Pgtap--0.94.0--0.95.0 (assertion 42).", "schema": null, "sql": "-- col_is_unique( scheam, table, column )\nCREATE OR REPLACE FUNCTION col_is_unique ( NAME, NAME, NAME )\nRETURNS TEXT AS $$\n SELECT col_is_unique( $1, $2, ARRAY[$3], 'Column ' || quote_ident($2) || '(' || quote_ident($3) || ') should have a unique constraint' );\n$$ LANGUAGE sql;", "explanation": "SQL assertion from pgTAP test suite for Pgtap--0.94.0--0.95.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 281, "num_statements": 2}
{"question": "What is the maximum cost of devices for users in urban areas?", "schema": "CREATE TABLE devices (device_id INT, device_cost FLOAT, user_location VARCHAR(10)); INSERT INTO devices VALUES (1, 300, 'rural'), (2, 500, 'urban'), (3, 400, 'rural');", "sql": "SELECT MAX(device_cost) FROM devices WHERE user_location = 'urban';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1}
{"question": "Update the climate finance data to reflect the current inflation rates, using the 'inflation_rates' table.", "schema": "CREATE TABLE climate_finance (project VARCHAR(50), country VARCHAR(50), amount FLOAT, date DATE); CREATE TABLE inflation_rates (country VARCHAR(50), rate FLOAT, date DATE); INSERT INTO climate_finance (project, country, amount, date) VALUES ('Green City', 'USA', 5000000, '2020-01-01'); INSERT INTO inflation_rates (country, rate, date) VALUES ('USA', 1.02, '2020-01-01');", "sql": "UPDATE climate_finance SET amount = amount * (SELECT rate FROM inflation_rates WHERE climate_finance.country = inflation_rates.country AND climate_finance.date = inflation_rates.date);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 184, "num_statements": 1}
{"question": "Select names of all sustainable suppliers", "schema": "CREATE TABLE supplier_info (id INT PRIMARY KEY, name VARCHAR(255), country VARCHAR(255), sustainable_practices BOOLEAN); CREATE VIEW sustainable_suppliers AS SELECT * FROM supplier_info WHERE sustainable_practices = TRUE;", "sql": "SELECT name FROM sustainable_suppliers;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 39, "num_statements": 1}
{"question": "PostgreSQL regression test 'date': Write the SELECT query (example 196).", "schema": null, "sql": "SELECT EXTRACT(TIMEZONE_M FROM DATE '2020-08-11');", "explanation": "Regression test for Date in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT EXTRACT(TIMEZONE_M FROM DATE '2020-08-11')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Which week was the team's bye week?", "schema": "CREATE TABLE table_name_78 (week INTEGER, attendance VARCHAR)", "sql": "SELECT MAX(week) FROM table_name_78 WHERE attendance = 'bye';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Which translation was published in 1986?", "schema": "CREATE TABLE table_name_78 (translation VARCHAR, date VARCHAR)", "sql": "SELECT translation FROM table_name_78 WHERE date = 1986;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1}
{"question": "What is the total number of 'recycling' facilities in 'Japan'?", "schema": "CREATE TABLE facilities (id INT, name TEXT, type TEXT, location TEXT); INSERT INTO facilities (id, name, type, location) VALUES (1, 'recycling plant', 'recycling', 'Japan'), (2, 'waste treatment plant', 'waste', 'Japan'), (3, 'recycling plant', 'recycling', 'China');", "sql": "SELECT COUNT(*) FROM facilities WHERE type = 'recycling' AND location = 'Japan';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1}
{"question": "What is the total number of shipments for each warehouse, excluding cancelled shipments?", "schema": "CREATE TABLE shipments (shipment_id INT, warehouse_id VARCHAR(5), quantity INT, cancelled BOOLEAN); CREATE TABLE warehouses (warehouse_id VARCHAR(5), city VARCHAR(5), state VARCHAR(3)); INSERT INTO shipments VALUES (1, 'LAX', 200, FALSE), (2, 'NYC', 300, TRUE), (3, 'LAX', 100, FALSE), (4, 'JFK', 50, FALSE); INSERT INTO warehouses VALUES ('LAX', 'Los', ' Angeles'), ('NYC', 'New', ' York'), ('JFK', 'New', ' York');", "sql": "SELECT warehouses.warehouse_id, COUNT(shipments.shipment_id) FROM warehouses LEFT JOIN shipments ON warehouses.warehouse_id = shipments.warehouse_id WHERE NOT shipments.cancelled GROUP BY warehouses.warehouse_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 212, "num_statements": 1}
{"question": "Show the total number of vehicles in 'Tokyo', 'Delhi', and 'Sao Paulo'", "schema": "CREATE TABLE public.vehicles (id INT, type VARCHAR(20), city VARCHAR(20)); INSERT INTO public.vehicles (id, type, city) VALUES (1, 'electric_car', 'Tokyo'), (2, 'conventional_car', 'Tokyo'), (3, 'autonomous_bus', 'Delhi'), (4, 'conventional_car', 'Delhi'), (5, 'electric_bus', 'Sao Paulo'), (6, 'conventional_bus', 'Sao Paulo');", "sql": "SELECT SUM(cnt) FROM (SELECT city, COUNT(*) AS cnt FROM public.vehicles WHERE city IN ('Tokyo', 'Delhi', 'Sao Paulo') GROUP BY city) AS subquery;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 145, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What event had 1 round and a record of 6-2?", "schema": "CREATE TABLE table_name_9 (event VARCHAR, round VARCHAR, record VARCHAR)", "sql": "SELECT event FROM table_name_9 WHERE round = 1 AND record = '6-2';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What lane did the swimmer with a time of 52.84 have?", "schema": "CREATE TABLE table_name_43 (lane INTEGER, time VARCHAR)", "sql": "SELECT MAX(lane) FROM table_name_43 WHERE time = 52.84;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What is the sum of year with the local host Sai?", "schema": "CREATE TABLE table_name_37 (year INTEGER, local_host_s_ VARCHAR)", "sql": "SELECT SUM(year) FROM table_name_37 WHERE local_host_s_ = 'sai';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What is Hungary's highest Rank?", "schema": "CREATE TABLE table_name_38 (rank INTEGER, country VARCHAR)", "sql": "SELECT MAX(rank) FROM table_name_38 WHERE country = 'hungary';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1}
{"question": "Insert a new record into the customers table", "schema": "CREATE TABLE customers (customer_id INT, first_name VARCHAR(50), last_name VARCHAR(50), email VARCHAR(100), phone_number VARCHAR(15), created_at TIMESTAMP);", "sql": "INSERT INTO customers (customer_id, first_name, last_name, email, phone_number, created_at) VALUES (1001, 'Jamal', 'Lewis', 'jamal.lewis@mail.com', '5551234567', '2022-01-01 10:30:00');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 185, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What is the latest year joined with a Conference championships of 5, and an Institution of university of north carolina?", "schema": "CREATE TABLE table_name_84 (joined INTEGER, conference_championships VARCHAR, institution VARCHAR)", "sql": "SELECT MAX(joined) FROM table_name_84 WHERE conference_championships = 5 AND institution = 'university of north carolina';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 122, "num_statements": 1}
{"question": "Show the number of customer complaints regarding mobile and broadband services, pivoted by the service type and month.", "schema": "CREATE TABLE customer_complaints (complaint_id INT, complaint_date DATE, service_type VARCHAR(50)); INSERT INTO customer_complaints (complaint_id, complaint_date, service_type) VALUES (1, '2022-01-01', 'Mobile'), (2, '2022-02-01', 'Broadband'), (3, '2022-01-01', 'Broadband');", "sql": "SELECT EXTRACT(MONTH FROM complaint_date) as month, service_type, COUNT(*) as complaints FROM customer_complaints GROUP BY month, service_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 143, "num_statements": 1}
{"question": "Find the maximum number of attendees in virtual tours across Asian countries, in the last 6 months.", "schema": "CREATE TABLE virtual_tours (id INT, location TEXT, attendees INT, tour_date DATE); INSERT INTO virtual_tours (id, location, attendees, tour_date) VALUES (1, 'Tokyo', 25, '2022-01-01'), (2, 'Seoul', 30, '2022-02-10');", "sql": "SELECT MAX(attendees) FROM virtual_tours WHERE location LIKE '%Asia%' AND tour_date >= DATE_SUB(NOW(), INTERVAL 6 MONTH);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 121, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Result of win, and a Score of 33-22 involved what event?", "schema": "CREATE TABLE table_name_4 (event VARCHAR, result VARCHAR, score VARCHAR)", "sql": "SELECT event FROM table_name_4 WHERE result = 'win' AND score = '33-22';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1}
{"question": "What is the total number of eco-certified destinations in Oceania?", "schema": "CREATE TABLE destinations (destination_id INT, name VARCHAR(50), country_id INT, is_eco_certified BOOLEAN); INSERT INTO destinations (destination_id, name, country_id, is_eco_certified) VALUES (11, 'Great Barrier Reef', 14, true); INSERT INTO destinations (destination_id, name, country_id, is_eco_certified) VALUES (12, 'Fiordland National Park', 15, true);", "sql": "SELECT COUNT(*) FROM destinations d WHERE d.is_eco_certified = true AND d.country_id IN (SELECT country_id FROM countries WHERE continent = 'Oceania');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 151, "num_statements": 1}
{"question": "Update the R&D expenditure for 'DrugB' to $2,500,000 in Q3 2019.", "schema": "CREATE TABLE rd_expenditure (drug_name TEXT, quarter INTEGER, year INTEGER, amount INTEGER); INSERT INTO rd_expenditure (drug_name, quarter, year, amount) VALUES ('DrugB', 3, 2019, 2000000);", "sql": "UPDATE rd_expenditure SET amount = 2500000 WHERE drug_name = 'DrugB' AND quarter = 3 AND year = 2019;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 101, "num_statements": 1}
{"question": "What is the total number of defense diplomacy events in the diplomacy_2020 table, grouped by region?", "schema": "CREATE TABLE diplomacy_2020 (id INT, region VARCHAR(255), event VARCHAR(255)); INSERT INTO diplomacy_2020 VALUES (1, 'Asia', 'Summit A'), (2, 'Africa', 'Summit B'), (3, 'Asia', 'Summit C');", "sql": "SELECT region, COUNT(*) FROM diplomacy_2020 GROUP BY region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: name the team for 36-29 record", "schema": "CREATE TABLE table_27902171_8 (team VARCHAR, record VARCHAR)", "sql": "SELECT team FROM table_27902171_8 WHERE record = '36-29';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What is the country that has a place of T6, a socre of 69-69=138, and where Niclas Fasth played?", "schema": "CREATE TABLE table_name_17 (country VARCHAR, player VARCHAR, place VARCHAR, score VARCHAR)", "sql": "SELECT country FROM table_name_17 WHERE place = 't6' AND score = 69 - 69 = 138 AND player = 'niclas fasth';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 107, "num_statements": 1}
{"question": "pgTAP test for Proctap (assertion 72).", "schema": null, "sql": "SELECT * FROM check_test(\n isnt_procedure( 'argpubfunc' ),\n true,\n 'isnt_procedure(func)',\n 'Function argpubfunc() should not be a procedure',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Proctap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 164, "num_statements": 1}
{"question": "What is the circular economy initiative in the city of London?", "schema": "CREATE TABLE circular_economy (city VARCHAR(255), initiative VARCHAR(255)); INSERT INTO circular_economy (city, initiative) VALUES ('London', 'Waste to Wealth');", "sql": "SELECT initiative FROM circular_economy WHERE city = 'London';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What was the Rd. Time for October 3, 2009?", "schema": "CREATE TABLE table_name_86 (rd VARCHAR, _time VARCHAR, date VARCHAR)", "sql": "SELECT rd, _time FROM table_name_86 WHERE date = 'october 3, 2009';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1}
{"question": "PostgreSQL Rules: show example 35.", "schema": null, "sql": "INSERT INTO shoelace_log VALUES ( shoelace_data.sl_name, 6, current_user, current_timestamp ) FROM shoelace_data WHERE 6 <> shoelace_data.sl_avail AND shoelace_data.sl_name = 'sl7'; UPDATE shoelace_data SET sl_avail = 6 WHERE sl_name = 'sl7';", "explanation": "Example from PostgreSQL documentation on Rules.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 242, "num_statements": 2}
{"question": "Write the DML statement from PostgreSQL regression test 'numeric' (example 227).", "schema": null, "sql": "INSERT INTO num_exp_div VALUES (5,3,'3804.41728329466357308584');", "explanation": "DML from PostgreSQL core regression test for Numeric.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1}
{"question": "What is the total number of mobile and broadband subscribers for each technology type?", "schema": "CREATE TABLE mobile_subscribers (subscriber_id INT, technology VARCHAR(20)); CREATE TABLE broadband_subscribers (subscriber_id INT, technology VARCHAR(20)); INSERT INTO mobile_subscribers (subscriber_id, technology) VALUES (1, '4G'), (2, '5G'), (3, '3G'); INSERT INTO broadband_subscribers (subscriber_id, technology) VALUES (4, 'Fiber'), (5, 'Cable'), (6, 'DSL');", "sql": "SELECT 'Mobile' as source, technology, COUNT(*) as total FROM mobile_subscribers GROUP BY technology UNION ALL SELECT 'Broadband' as source, technology, COUNT(*) as total FROM broadband_subscribers GROUP BY technology;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 218, "num_statements": 1}
{"question": "Show the number of dams in each state", "schema": "CREATE TABLE Dams (id INT, state VARCHAR(50)); INSERT INTO Dams (id, state) VALUES (1, 'California'), (2, 'Texas');", "sql": "SELECT state, COUNT(*) FROM Dams GROUP BY state;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 48, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What is the highest numbered event?", "schema": "CREATE TABLE table_30060356_3 (event INTEGER)", "sql": "SELECT MAX(event) FROM table_30060356_3;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 40, "num_statements": 1}
{"question": "What is the average time taken for cases to be resolved for each ethnicity of judges?", "schema": "CREATE TABLE public.judges (id SERIAL PRIMARY KEY, name VARCHAR(255), age INT, ethnicity VARCHAR(255), appointment_date DATE); CREATE TABLE public.cases (id SERIAL PRIMARY KEY, judge_id INT, case_number VARCHAR(255), case_date DATE, case_type VARCHAR(255), court_location VARCHAR(255));", "sql": "SELECT j.ethnicity, AVG(c.case_date - j.appointment_date) as average_time_to_resolve FROM public.judges j JOIN public.cases c ON j.id = c.judge_id GROUP BY j.ethnicity;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": true, "sql_length": 168, "num_statements": 1}
{"question": "Identify the agricultural innovation metrics that have the lowest average score in Central America and the Caribbean.", "schema": "CREATE TABLE innovation_metrics (id INT, name TEXT, score INT, region TEXT); INSERT INTO innovation_metrics (id, name, score, region) VALUES (1, 'Soil Monitoring', 7, 'Central America'), (2, 'Irrigation', 6, 'Caribbean'), (3, 'Crop Yield', 8, 'Central America'), (4, 'Livestock Management', 9, 'Caribbean');", "sql": "SELECT name, AVG(score) as avg_score FROM innovation_metrics WHERE region IN ('Central America', 'Caribbean') GROUP BY name ORDER BY avg_score LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 151, "num_statements": 1}
{"question": "What was the total donation amount by individuals in Canada in Q1 2021?", "schema": "CREATE TABLE Donations (id INT, donor_name VARCHAR(255), donation_amount DECIMAL(10,2), donation_date DATE); INSERT INTO Donations (id, donor_name, donation_amount, donation_date) VALUES (1, 'John Smith', 50.00, '2021-01-10'), (2, 'Emily Johnson', 75.00, '2021-03-15');", "sql": "SELECT SUM(donation_amount) FROM Donations WHERE donor_name NOT LIKE '%org%' AND donation_date BETWEEN '2021-01-01' AND '2021-03-31';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 133, "num_statements": 1}
{"question": "What is the number of unique donors per country, for countries that have received donations?", "schema": "CREATE TABLE donors (id INT, name TEXT, organization TEXT, country TEXT);CREATE TABLE donations (id INT, donor_id INT, amount DECIMAL(10,2)); INSERT INTO donors (id, name, organization, country) VALUES (1, 'Donor A', 'Organization 1', 'Country A'), (2, 'Donor B', 'Organization 2', 'Country A'), (3, 'Donor C', 'Organization 3', 'Country B'), (4, 'Donor D', 'Organization 4', 'Country C'); INSERT INTO donations (id, donor_id, amount) VALUES (1, 1, 500.00), (2, 1, 750.00), (3, 2, 300.00), (4, 3, 1000.00), (5, 4, 250.00);", "sql": "SELECT donors.country, COUNT(DISTINCT donors.id) FROM donors INNER JOIN donations ON donors.id = donations.donor_id GROUP BY donors.country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 140, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Show party names and the number of events for each party.", "schema": "CREATE TABLE party (party_name VARCHAR, party_id VARCHAR); CREATE TABLE party_events (party_id VARCHAR)", "sql": "SELECT T2.party_name, COUNT(*) FROM party_events AS T1 JOIN party AS T2 ON T1.party_id = T2.party_id GROUP BY T1.party_id;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 122, "num_statements": 1}
{"question": "Which classical models were developed for explainable AI?", "schema": "CREATE TABLE Explainable_Models (Model_Type VARCHAR(20), Model_Name VARCHAR(30)); INSERT INTO Explainable_Models (Model_Type, Model_Name) VALUES ('Classical', 'Decision Trees'), ('Classical', 'Logistic Regression');", "sql": "SELECT Model_Name FROM Explainable_Models WHERE Model_Type = 'Classical';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1}
{"question": "pgTAP test for Functap (assertion 39).", "schema": null, "sql": "SELECT * FROM check_test(\n hasnt_function( 'pg_catalog', 'now', 'whatever' ),\n false,\n 'simple schema.func with desc',\n 'whatever',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Functap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 153, "num_statements": 1}
{"question": "Which restorative justice programs have the highest participant satisfaction rates by facilitator?", "schema": "CREATE TABLE participants (participant_id INT, participant_satisfaction INT, program_id INT); CREATE TABLE programs (program_id INT, facilitator_id INT, program_type VARCHAR(255));", "sql": "SELECT facilitator_name, MAX(AVG(participant_satisfaction)) as avg_satisfaction FROM participants JOIN programs ON programs.program_id = participants.program_id JOIN facilitators ON programs.facilitator_id = facilitators.facilitator_id GROUP BY facilitator_name HAVING program_type = 'Restorative Justice';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 306, "num_statements": 1}
{"question": "PL/pgSQL test: Pltcl Queries (example 14).", "schema": null, "sql": "-- test some error cases\ncreate function tcl_error(out a int, out b int) as $$returm 1$$ language pltcl;\nselect tcl_error();\n\ncreate function bad_record(out a text, out b text) as $$return [list a]$$ language pltcl;", "explanation": "PL/pgSQL example from PostgreSQL source test for Pltcl Queries.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 215, "num_statements": 3}
{"question": "Generate PostgreSQL SQL for: What is the week 12 opponent for the year that had a week 3 opponent of South Florida (3-0)?", "schema": "CREATE TABLE table_name_6 (week_12_nov_16 VARCHAR, week_3_sept_14 VARCHAR)", "sql": "SELECT week_12_nov_16 FROM table_name_6 WHERE week_3_sept_14 = 'south florida (3-0)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1}
{"question": "What is the maximum age of players who have played VR games?", "schema": "CREATE TABLE Players (PlayerID INT, Age INT, Gender VARCHAR(10), HasPlayedVR BOOLEAN); INSERT INTO Players (PlayerID, Age, Gender, HasPlayedVR) VALUES (1, 25, 'Male', true), (2, 30, 'Female', false), (3, 22, 'Male', true);", "sql": "SELECT MAX(Age) FROM Players WHERE HasPlayedVR = true;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1}
{"question": "Delete the 'peacekeeping_view' view", "schema": "CREATE VIEW peacekeeping_view AS SELECT operation_id, name, location FROM peacekeeping_operations", "sql": "DROP VIEW peacekeeping_view;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "other", "is_postgresql_specific": false, "sql_length": 28, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What player that has a position of middle blocker, a Nationality of Turkey, and shirt no is 8?", "schema": "CREATE TABLE table_name_36 (player VARCHAR, shirt_no VARCHAR, position VARCHAR, nationality VARCHAR)", "sql": "SELECT player FROM table_name_36 WHERE position = 'middle blocker' AND nationality = 'turkey' AND shirt_no = 8;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 111, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What is Result, when Date is \"June 11, 1994\", and when Venue is \"Miami, United States\"?", "schema": "CREATE TABLE table_name_58 (result VARCHAR, date VARCHAR, venue VARCHAR)", "sql": "SELECT result FROM table_name_58 WHERE date = 'june 11, 1994' AND venue = 'miami, united states';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1}
{"question": "Get the number of volleyball games played in 2022", "schema": "CREATE TABLE volleyball_games (game_date DATE, team1 VARCHAR(255), team2 VARCHAR(255)); INSERT INTO volleyball_games (game_date, team1, team2) VALUES ('2022-01-01', 'Brazil', 'Russia'); INSERT INTO volleyball_games (game_date, team1, team2) VALUES ('2022-01-02', 'Italy', 'USA');", "sql": "SELECT COUNT(*) FROM volleyball_games WHERE YEAR(game_date) = 2022;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Which Year is the highest one that has a Bronze of south korea, and a Silver of philippines?", "schema": "CREATE TABLE table_name_27 (year INTEGER, bronze VARCHAR, silver VARCHAR)", "sql": "SELECT MAX(year) FROM table_name_27 WHERE bronze = 'south korea' AND silver = 'philippines';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What Away team had an Attendance of 3,395?", "schema": "CREATE TABLE table_name_68 (away_team VARCHAR, attendance VARCHAR)", "sql": "SELECT away_team FROM table_name_68 WHERE attendance = '3,395';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Who was the away team playing against collingwood?", "schema": "CREATE TABLE table_name_15 (away_team VARCHAR, home_team VARCHAR)", "sql": "SELECT away_team FROM table_name_15 WHERE home_team = 'collingwood';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Phil Mickelson has what To par?", "schema": "CREATE TABLE table_name_47 (to_par VARCHAR, player VARCHAR)", "sql": "SELECT to_par FROM table_name_47 WHERE player = 'phil mickelson';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What type of surface was played on when the score was 2–6, 6–1, [10–5]?", "schema": "CREATE TABLE table_name_7 (surface VARCHAR, score VARCHAR)", "sql": "SELECT surface FROM table_name_7 WHERE score = '2–6, 6–1, [10–5]';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1}
{"question": "pgTAP test for Pgtap--Unpackaged--0.91.0 (assertion 386).", "schema": null, "sql": "ALTER EXTENSION pgtap ADD FUNCTION cast_context_is( NAME, NAME, TEXT );", "explanation": "SQL assertion from pgTAP test suite for Pgtap--Unpackaged--0.91.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1}
{"question": "How many customers from each country have an account balance greater than 10000?", "schema": "CREATE TABLE customers (id INT, name VARCHAR(255), country VARCHAR(255)); INSERT INTO customers (id, name, country) VALUES (1, 'John Doe', 'USA'), (2, 'Jane Smith', 'Canada'), (3, 'Marie Lee', 'France'); CREATE TABLE accounts (id INT, customer_id INT, balance DECIMAL(10, 2)); INSERT INTO accounts (id, customer_id, balance) VALUES (1, 1, 12000.00), (2, 1, 18000.00), (3, 2, 6000.00), (4, 3, 1500.00);", "sql": "SELECT customers.country, COUNT(DISTINCT customers.id) FROM customers INNER JOIN accounts ON customers.id = accounts.customer_id WHERE accounts.balance > 10000 GROUP BY customers.country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 187, "num_statements": 1}
{"question": "Update the price of a menu item", "schema": "CREATE TABLE menu_items (item_id INT, item_name VARCHAR(255), price DECIMAL(5,2));", "sql": "UPDATE menu_items SET price = 15.99 WHERE item_id = 678;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What was the score for the team with a time of 2:44?", "schema": "CREATE TABLE table_name_8 (score VARCHAR, time VARCHAR)", "sql": "SELECT score FROM table_name_8 WHERE time = '2:44';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1}
{"question": "What is the minimum number of accommodations provided, by accommodation type, for each region?", "schema": "CREATE TABLE Accommodations (ID INT PRIMARY KEY, Region VARCHAR(50), AccommodationType VARCHAR(50), Quantity INT); INSERT INTO Accommodations (ID, Region, AccommodationType, Quantity) VALUES (1, 'North America', 'Sign Language Interpretation', 300), (2, 'North America', 'Wheelchair Ramp', 250), (3, 'South America', 'Assistive Listening Devices', 150), (4, 'Asia', 'Mobility Assistance', 200), (5, 'Europe', 'Sign Language Interpretation', 400), (6, 'Africa', 'Wheelchair Ramp', 100);", "sql": "SELECT Region, AccommodationType, MIN(Quantity) as Minimum FROM Accommodations GROUP BY Region, AccommodationType;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 114, "num_statements": 1}
{"question": "Update the total donation amount for donor 'Pedro Garcia' to $4500.", "schema": "CREATE TABLE donors (donor_id INT, donor_name TEXT, country TEXT, total_donation_amount FLOAT); INSERT INTO donors (donor_id, donor_name, country, total_donation_amount) VALUES (1, 'Juan Rodriguez', 'Mexico', 4000.00), (2, 'Natalia Ivanova', 'Russia', 5000.00), (3, 'Pedro Garcia', 'Brazil', 2500.00);", "sql": "WITH updated_pedro_garcia AS (UPDATE donors SET total_donation_amount = 4500.00 WHERE donor_name = 'Pedro Garcia' AND country = 'Brazil' RETURNING *) SELECT * FROM updated_pedro_garcia;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_update", "is_postgresql_specific": true, "sql_length": 185, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What is the English word for twie?", "schema": "CREATE TABLE table_name_43 (english VARCHAR, twie VARCHAR, Limburgish VARCHAR)", "sql": "SELECT english FROM table_name_43 WHERE DUTCH(Limburgish) = twie;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1}
{"question": "Who are the construction workers in Washington with a salary higher than the average salary?", "schema": "CREATE TABLE construction_workers (id INT, name VARCHAR(50), salary DECIMAL(10, 2), state VARCHAR(10)); INSERT INTO construction_workers (id, name, salary, state) VALUES (1, 'John Doe', 60000, 'Washington'); INSERT INTO construction_workers (id, name, salary, state) VALUES (2, 'Jane Smith', 55000, 'Washington');", "sql": "SELECT * FROM construction_workers WHERE state = 'Washington' AND salary > (SELECT AVG(salary) FROM construction_workers WHERE state = 'Washington');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 149, "num_statements": 1}
{"question": "What is the minimum depth of marine protected areas in the Arctic Ocean region?", "schema": "CREATE TABLE arctic_marine_protected_areas (id INT, name TEXT, region TEXT, min_depth FLOAT); INSERT INTO arctic_marine_protected_areas (id, name, region, min_depth) VALUES (1, 'Norwegian Trench', 'Arctic', 3000.0), (2, 'Fram Strait', 'Arctic', 2500.0);", "sql": "SELECT MIN(min_depth) FROM arctic_marine_protected_areas WHERE region = 'Arctic';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1}
{"question": "Which regions have the highest and lowest donation amounts?", "schema": "CREATE TABLE Donations (id INT, region VARCHAR(20), amount FLOAT); INSERT INTO Donations (id, region, amount) VALUES (1, 'Northeast', 25000.00), (2, 'Southeast', 30000.00), (3, 'Midwest', 20000.00), (4, 'Southwest', 15000.00), (5, 'Northwest', 35000.00), (6, 'Asia', 40000.00), (7, 'Africa', 10000.00), (8, 'Europe', 27000.00), (9, 'South America', 22000.00), (10, 'Australia', 32000.00);", "sql": "SELECT region, amount FROM Donations ORDER BY amount DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What is the name of the Representative Work in a year later than 2005 with a Result of nominated, and an Award of best variety show host?", "schema": "CREATE TABLE table_name_20 (representative_work VARCHAR, award VARCHAR, year VARCHAR, result VARCHAR)", "sql": "SELECT representative_work FROM table_name_20 WHERE year > 2005 AND result = 'nominated' AND award = 'best variety show host';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 126, "num_statements": 1}
{"question": "Show a query using PostgreSQL contrib extension 'pg_stat_statements' (example 43).", "schema": null, "sql": "SELECT * FROM test_squash WHERE id = ANY (ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10])\n AND data = ANY (ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);", "explanation": "Example query from the 'pg_stat_statements' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 138, "num_statements": 1}
{"question": "Insert new records into the museum_operations table for a new exhibit.", "schema": "CREATE TABLE museum_operations (exhibit_id INT, exhibit_name TEXT, start_date DATE, end_date DATE, daily_visitors INT);", "sql": "INSERT INTO museum_operations (exhibit_id, exhibit_name, start_date, end_date, daily_visitors) VALUES (1001, 'Contemporary Art from Japan', '2023-03-01', '2023-05-31', 500);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 173, "num_statements": 1}
{"question": "Write the DML statement from PostgreSQL regression test 'box' (example 8).", "schema": null, "sql": "INSERT INTO BOX_TBL (f1) VALUES ('[1, 2, 3, 4)');", "explanation": "DML from PostgreSQL core regression test for Box.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1}
{"question": "Update the temperature records for Svalbard with the new measurements.", "schema": "CREATE TABLE arctic_regions (region_id INT, region_name VARCHAR(50)); CREATE TABLE weather (weather_id INT, region_id INT, measurement_date DATE, temperature DECIMAL(5,2)); INSERT INTO arctic_regions (region_id, region_name) VALUES (1, 'Alaska'), (2, 'Greenland'), (3, 'Svalbard'); INSERT INTO weather (weather_id, region_id, measurement_date, temperature) VALUES (1, 1, '2017-01-01', -10.5), (2, 1, '2017-12-31', 15.2), (3, 2, '2017-01-01', -25.6), (4, 2, '2017-12-31', -5.2), (5, 3, '2017-01-01', -12.2);", "sql": "UPDATE weather SET temperature = -3.0 WHERE region_id = (SELECT region_id FROM arctic_regions WHERE region_name = 'Svalbard') AND measurement_date = '2017-01-01'; UPDATE weather SET temperature = -4.2 WHERE region_id = (SELECT region_id FROM arctic_regions WHERE region_name = 'Svalbard') AND measurement_date = '2017-12-31';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 325, "num_statements": 2}
{"question": "List all military technologies that were used in the last 2 military conflicts, including the technology type and conflict date.", "schema": "CREATE TABLE military_tech_usage (id INT, tech_type TEXT, tech_usage_date DATE, conflict TEXT); INSERT INTO military_tech_usage (id, tech_type, tech_usage_date, conflict) VALUES (1, 'Drones', '2020-02-01', 'Conflict A'), (2, 'Armored Vehicles', '2019-11-15', 'Conflict B');", "sql": "SELECT mt.tech_type, mt.tech_usage_date FROM military_tech_usage mt WHERE mt.tech_usage_date >= '2019-01-01';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 109, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What country has a loan as the type?", "schema": "CREATE TABLE table_name_6 (country VARCHAR, type VARCHAR)", "sql": "SELECT country FROM table_name_6 WHERE type = 'loan';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: How many players had a best winning average of 20?", "schema": "CREATE TABLE table_27533947_1 (games_won VARCHAR, best_winning_average VARCHAR)", "sql": "SELECT COUNT(games_won) FROM table_27533947_1 WHERE best_winning_average = '20';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What is the Date of Tie no 3?", "schema": "CREATE TABLE table_name_67 (date VARCHAR, tie_no VARCHAR)", "sql": "SELECT date FROM table_name_67 WHERE tie_no = '3';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 50, "num_statements": 1}
{"question": "Determine the monthly sales growth of eco-friendly makeup products in the last year.", "schema": "CREATE TABLE MakeupSales (ProductID INT, ProductType VARCHAR(20), IsEcoFriendly BOOLEAN, Revenue DECIMAL(10,2), SaleDate DATE); INSERT INTO MakeupSales (ProductID, ProductType, IsEcoFriendly, Revenue, SaleDate) VALUES (1, 'Lipstick', TRUE, 50.00, '2022-01-15'); INSERT INTO MakeupSales (ProductID, ProductType, IsEcoFriendly, Revenue, SaleDate) VALUES (2, 'Eyeshadow', TRUE, 75.00, '2022-02-20'); INSERT INTO MakeupSales (ProductID, ProductType, IsEcoFriendly, Revenue, SaleDate) VALUES (3, 'Foundation', TRUE, 60.00, '2022-03-05'); INSERT INTO MakeupSales (ProductID, ProductType, IsEcoFriendly, Revenue, SaleDate) VALUES (4, 'Blush', TRUE, 80.00, '2022-04-10');", "sql": "SELECT EXTRACT(MONTH FROM SaleDate) AS Month, AVG(Revenue) AS AverageRevenue, LAG(AVG(Revenue)) OVER (ORDER BY EXTRACT(MONTH FROM SaleDate)) AS PreviousMonthAverage FROM MakeupSales WHERE ProductType = 'Makeup' AND IsEcoFriendly = TRUE GROUP BY EXTRACT(MONTH FROM SaleDate) ORDER BY EXTRACT(MONTH FROM SaleDate);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 312, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What season had an acquisition of free agency, and was higher than 9?", "schema": "CREATE TABLE table_name_89 (season VARCHAR, acquisition_via VARCHAR, number VARCHAR)", "sql": "SELECT season FROM table_name_89 WHERE acquisition_via = 'free agency' AND number > 9;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1}
{"question": "What is the total number of factories in the workforce development sector that have implemented workforce diversity initiatives and have a workforce size above 500?", "schema": "CREATE TABLE factories (factory_id INT, sector VARCHAR(255), has_workforce_diversity_initiatives BOOLEAN, workforce_size INT); INSERT INTO factories (factory_id, sector, has_workforce_diversity_initiatives, workforce_size) VALUES (1, 'Workforce Development', TRUE, 600), (2, 'Workforce Development', TRUE, 400), (3, 'Workforce Development', FALSE, 500);", "sql": "SELECT COUNT(*) FROM factories WHERE sector = 'Workforce Development' AND has_workforce_diversity_initiatives = TRUE AND workforce_size > 500;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 142, "num_statements": 1}
{"question": "Write the DDL statement from PostgreSQL regression test 'eager_aggregate' (example 3).", "schema": null, "sql": "CREATE TABLE eager_agg_t3 (a int, b int, c double precision);", "explanation": "DDL from PostgreSQL core regression test for Eager Aggregate.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1}
{"question": "What was the average price per gram of cannabis flower sold by each dispensary in the city of Toronto in the month of February 2022?", "schema": "CREATE TABLE Dispensaries (id INT, name VARCHAR(255), city VARCHAR(255), state VARCHAR(255));CREATE TABLE Inventory (id INT, dispensary_id INT, price DECIMAL(10, 2), product_type VARCHAR(255), grams INT, month INT, year INT);INSERT INTO Dispensaries (id, name, city, state) VALUES (1, 'CannaCorp', 'Toronto', 'ON');INSERT INTO Inventory (id, dispensary_id, price, product_type, grams, month, year) VALUES (1, 1, 20, 'flower', 3.5, 2, 2022);", "sql": "SELECT d.name, AVG(i.price/i.grams) as avg_price_per_gram FROM Dispensaries d JOIN Inventory i ON d.id = i.dispensary_id WHERE d.city = 'Toronto' AND i.product_type = 'flower' AND i.month = 2 AND i.year = 2022 GROUP BY d.name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 226, "num_statements": 1}
{"question": "What is the minimum travel time for an autonomous ride-hailing service in Singapore?", "schema": "CREATE TABLE autonomous_ride_hailing (ride_id INT, travel_time FLOAT, city VARCHAR(50));", "sql": "SELECT MIN(travel_time) FROM autonomous_ride_hailing WHERE city = 'Singapore';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1}
{"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 26).", "schema": null, "sql": "-- create the operator class for gist\nCREATE OPERATOR CLASS gist_trgm_ops\nFOR TYPE text USING gist\nAS\n OPERATOR 1 % (text, text),\n FUNCTION 1 gtrgm_consistent (internal, text, smallint, oid, internal),\n FUNCTION 2 gtrgm_union (internal, internal),\n FUNCTION 3 gtrgm_compress (internal),\n FUNCTION 4 gtrgm_decompress (internal),\n FUNCTION 5 gtrgm_penalty (internal, internal, internal),\n FUNCTION 6 gtrgm_picksplit (internal, internal),\n FUNCTION 7 gtrgm_same (gtrgm, gtrgm, internal),\n STORAGE gtrgm;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "ddl_advanced", "is_postgresql_specific": true, "sql_length": 675, "num_statements": 1}
{"question": "Write the DML statement from PostgreSQL regression test 'timestamptz' (example 39).", "schema": null, "sql": "INSERT INTO TIMESTAMPTZ_TBL VALUES ('1997-02-10 17:32:01-0800');", "explanation": "DML from PostgreSQL core regression test for Timestamptz.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1}
{"question": "What is the total revenue for each restaurant, including the sum of sales for all menu items and additional charges?", "schema": "CREATE TABLE Restaurants (RestaurantID int, RestaurantName varchar(255)); CREATE TABLE MenuItems (MenuID int, MenuName varchar(255), RestaurantID int, Sales int); CREATE TABLE AdditionalCharges (ChargeID int, ChargeName varchar(255), RestaurantID int, ChargeAmt int);", "sql": "SELECT R.RestaurantName, SUM(M.Sales + AC.ChargeAmt) as TotalRevenue FROM Restaurants R INNER JOIN MenuItems M ON R.RestaurantID = M.RestaurantID INNER JOIN AdditionalCharges AC ON R.RestaurantID = AC.RestaurantID GROUP BY R.RestaurantName;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 240, "num_statements": 1}
{"question": "What is the average budget allocated per public service in the education sector?", "schema": "CREATE TABLE EducationBudget (ID INT, Service VARCHAR(255), Budget INT); INSERT INTO EducationBudget (ID, Service, Budget) VALUES (1, 'Primary Education', 5000000), (2, 'Secondary Education', 6000000), (3, 'Tertiary Education', 8000000);", "sql": "SELECT AVG(Budget) FROM EducationBudget WHERE Service LIKE 'Education%';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: How many parties is the incumbent Bob Brady a member of?", "schema": "CREATE TABLE table_1341423_38 (party VARCHAR, incumbent VARCHAR)", "sql": "SELECT COUNT(party) FROM table_1341423_38 WHERE incumbent = 'Bob Brady';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1}
{"question": "What is the change in monthly water usage for each household in Austin from 2020 to 2021?", "schema": "CREATE TABLE Household_Water_Usage (ID INT, Household VARCHAR(20), Year INT, Month INT, Usage FLOAT);", "sql": "SELECT h20_21.Household, AVG(h20_21.Usage - h20_20.Usage) as Avg_Change FROM Household_Water_Usage h20_21, Household_Water_Usage h20_20 WHERE h20_21.Household = h20_20.Household AND h20_20.Year = 2020 AND h20_21.Year = 2021 GROUP BY h20_21.Household;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 250, "num_statements": 1}
{"question": "Insert a new artifact 'Roman Coin' with ArtifactID 4, type 'Coin', quantity 20, and belonging to site 'Pompeii' (SiteID 3).", "schema": "CREATE TABLE ExcavationSites (SiteID INT, SiteName TEXT, Country TEXT); CREATE TABLE Artifacts (ArtifactID INT, SiteID INT, ArtifactName TEXT, ArtifactType TEXT, Quantity INT);", "sql": "INSERT INTO Artifacts (ArtifactID, SiteID, ArtifactName, ArtifactType, Quantity) VALUES (4, 3, 'Roman Coin', 'Coin', 20);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 121, "num_statements": 1}
{"question": "Present the types of military equipment from the USA", "schema": "CREATE TABLE military_equipment (id INT PRIMARY KEY, name VARCHAR(255), type VARCHAR(255), country VARCHAR(255));", "sql": "SELECT type FROM military_equipment WHERE country = 'USA';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: How many ends were won where the blank ends are smaller than 3.0?", "schema": "CREATE TABLE table_25718552_2 (Ends VARCHAR, blank_ends INTEGER)", "sql": "SELECT Ends AS won FROM table_25718552_2 WHERE blank_ends < 3.0;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Who was the opponent when the record was 11-6-2?", "schema": "CREATE TABLE table_name_25 (opponent VARCHAR, record VARCHAR)", "sql": "SELECT opponent FROM table_name_25 WHERE record = '11-6-2';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What is the Branding with a Callsign DWLL?", "schema": "CREATE TABLE table_name_99 (branding VARCHAR, callsign VARCHAR)", "sql": "SELECT branding FROM table_name_99 WHERE callsign = 'dwll';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1}
{"question": "What are the crops planted by farmers aged 40 or older?", "schema": "CREATE TABLE farmers (id INT PRIMARY KEY, name VARCHAR(50), age INT, location VARCHAR(50)); INSERT INTO farmers (id, name, age, location) VALUES (1, 'John Doe', 35, 'New York'); INSERT INTO farmers (id, name, age, location) VALUES (2, 'Jane Smith', 40, 'Los Angeles'); CREATE TABLE crops (id INT PRIMARY KEY, name VARCHAR(50), growth_season VARCHAR(50), planted_by INT, FOREIGN KEY (planted_by) REFERENCES farmers(id)); INSERT INTO crops (id, name, growth_season, planted_by) VALUES (1, 'Corn', 'Summer', 1); INSERT INTO crops (id, name, growth_season, planted_by) VALUES (2, 'Carrots', 'Winter', 2);", "sql": "SELECT crops.name FROM crops INNER JOIN farmers ON crops.planted_by = farmers.id WHERE farmers.age >= 40;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 105, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: When vp8 ( webm ) is 4.4, how much is vp9 ( webm )", "schema": "CREATE TABLE table_26099252_1 (vp9___webm__ VARCHAR, vp8___webm__ VARCHAR)", "sql": "SELECT vp9___webm__ FROM table_26099252_1 WHERE vp8___webm__ = '4.4';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1}
{"question": "What's the minimum donation amount made by donors from California in the year 2021?", "schema": "CREATE TABLE Donors (DonorID INT, DonorName VARCHAR(100), DonationAmount DECIMAL(10,2), DonationDate DATE, DonorState VARCHAR(50));", "sql": "SELECT MIN(DonationAmount) FROM Donors WHERE DonorState = 'California' AND YEAR(DonationDate) = 2021;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 101, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Show the station name with at least two trains.", "schema": "CREATE TABLE station (name VARCHAR, station_id VARCHAR); CREATE TABLE train_station (station_id VARCHAR)", "sql": "SELECT T2.name FROM train_station AS T1 JOIN station AS T2 ON T1.station_id = T2.station_id GROUP BY T1.station_id HAVING COUNT(*) >= 2;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 136, "num_statements": 1}
{"question": "Show a query using PostgreSQL contrib extension 'dblink' (example 24).", "schema": null, "sql": "INSERT INTO foo_1 VALUES (0,'a','{\"a0\",\"b0\",\"c0\"}');", "explanation": "Example query from the 'dblink' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1}
{"question": "PostgreSQL regression test 'jsonb': Write the SELECT query (example 234).", "schema": null, "sql": "select '[{\"b\": \"c\"}, {\"b\": \"cc\"}]'::jsonb #> array['1','b'];", "explanation": "Regression test for Jsonb in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select '[{\"b\": \"c\"}, {\"b\": \"cc\"}]'::jsonb #> array['1','b']) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 60, "num_statements": 1}
{"question": "Count the number of unique esports events where at least one player from Asia participated, and the number of unique FPS games played in these events.", "schema": "CREATE TABLE EsportsEvents (EventID INT, EventName VARCHAR(50)); CREATE TABLE Players (PlayerID INT, Age INT, Gender VARCHAR(10), Region VARCHAR(20)); CREATE TABLE PlayerEvent (PlayerID INT, EventID INT); CREATE TABLE Games (GameID INT, GameName VARCHAR(50), Genre VARCHAR(20)); CREATE TABLE GameEvent (GameID INT, EventID INT, GameType VARCHAR(10)); CREATE TABLE VR_Games (GameID INT, IsVR INT);", "sql": "SELECT COUNT(DISTINCT EsportsEvents.EventID), COUNT(DISTINCT Games.GameID) FROM EsportsEvents INNER JOIN PlayerEvent ON EsportsEvents.EventID = PlayerEvent.EventID INNER JOIN Players ON PlayerEvent.PlayerID = Players.PlayerID INNER JOIN Games ON GameEvent.GameID = Games.GameID INNER JOIN GameEvent ON EsportsEvents.EventID = GameEvent.EventID INNER JOIN VR_Games ON Games.GameID = VR_Games.GameID WHERE Players.Region = 'Asia' AND Games.Genre = 'FPS';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 452, "num_statements": 1}
{"question": "Delete all artifacts related to a specific excavation site", "schema": "CREATE TABLE ExcavationSites (SiteID int, Name varchar(50), Country varchar(50), StartDate date); INSERT INTO ExcavationSites (SiteID, Name, Country, StartDate) VALUES (5, 'Site E', 'Egypt', '2013-11-11'); CREATE TABLE Artifacts (ArtifactID int, SiteID int, Name varchar(50), Description text, DateFound date); INSERT INTO Artifacts (ArtifactID, SiteID, Name, Description, DateFound) VALUES (4, 5, 'Artifact W', 'An Egyptian artifact', '2017-07-07');", "sql": "DELETE FROM Artifacts WHERE SiteID = 5;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 39, "num_statements": 1}
{"question": "Number of patients who did not show improvement after dialectical behavior therapy (DBT) treatment in the USA.", "schema": "CREATE TABLE patients (patient_id INT, country VARCHAR(50)); INSERT INTO patients (patient_id, country) VALUES (1, 'USA'), (2, 'Canada'), (3, 'USA'); CREATE TABLE treatments (patient_id INT, treatment VARCHAR(10), improvement BOOLEAN); INSERT INTO treatments (patient_id, treatment, improvement) VALUES (1, 'DBT', FALSE), (2, 'DBT', TRUE), (3, 'CBT', TRUE);", "sql": "SELECT COUNT(patients.patient_id) FROM patients INNER JOIN treatments ON patients.patient_id = treatments.patient_id WHERE treatments.treatment = 'DBT' AND patients.country = 'USA' AND treatments.improvement = FALSE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 216, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Which season did the Minnesota Kicks lose 13 games and scored 156 points?", "schema": "CREATE TABLE table_name_34 (season VARCHAR, lost VARCHAR, points VARCHAR)", "sql": "SELECT COUNT(season) FROM table_name_34 WHERE lost = 13 AND points = 156;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1}
{"question": "List all historical sites in Spain with over 1000 virtual tours, ordered by the number of virtual tours in descending order.", "schema": "CREATE TABLE historical_sites(site_id INT, site_name TEXT, country TEXT, num_virtual_tours INT); INSERT INTO historical_sites(site_id, site_name, country, num_virtual_tours) VALUES (1, 'Alhambra', 'Spain', 1500), (2, 'Sagrada Familia', 'Spain', 1200), (3, 'Mosque of Cordoba', 'Spain', 800);", "sql": "SELECT site_id, site_name, num_virtual_tours FROM historical_sites WHERE country = 'Spain' AND num_virtual_tours > 1000 ORDER BY num_virtual_tours DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 152, "num_statements": 1}
{"question": "Find the percentage of visitors that engaged with online exhibitions from North America and Europe combined.", "schema": "CREATE TABLE Online_Interaction (id INT, user_id INT, interaction_date DATE, country VARCHAR(50)); INSERT INTO Online_Interaction (id, user_id, interaction_date, country) VALUES (1, 1, '2022-05-01', 'USA'), (2, 3, '2022-05-15', 'Canada'), (3, 5, '2022-04-20', 'France'), (4, 7, '2022-03-25', 'UK');", "sql": "SELECT (COUNT(DISTINCT CASE WHEN country IN ('USA', 'Canada', 'France', 'UK') THEN Online_Interaction.user_id END) * 100.0 / COUNT(DISTINCT Online_Interaction.user_id)) as percentage FROM Online_Interaction;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 207, "num_statements": 1}
{"question": "What is the total number of patients that have completed their treatment for each condition?", "schema": "CREATE TABLE TreatmentOutcomes (TreatmentID int, ConditionID int, Completed int); INSERT INTO TreatmentOutcomes (TreatmentID, ConditionID, Completed) VALUES (1, 1, 1), (2, 1, 0), (3, 2, 1);", "sql": "SELECT Conditions.Condition, SUM(TreatmentOutcomes.Completed) FROM TreatmentOutcomes JOIN Conditions ON TreatmentOutcomes.ConditionID = Conditions.ConditionID GROUP BY Conditions.Condition;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 189, "num_statements": 1}
{"question": "PL/pgSQL test: Pltcl Queries (example 64).", "schema": null, "sql": "select tcl_spi_exec(false, 'continue');", "explanation": "PL/pgSQL example from PostgreSQL source test for Pltcl Queries.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 39, "num_statements": 1}
{"question": "Show a query using PostgreSQL contrib extension 'ltree' (example 129).", "schema": null, "sql": "SELECT 'a.b.c.d.e'::ltree ~ 'a.*.e';", "explanation": "Example query from the 'ltree' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 36, "num_statements": 1}
{"question": "What is the average distance each marathoner ran in the Olympics?", "schema": "CREATE TABLE olympic_marathon (athlete VARCHAR(50), distance INT); INSERT INTO olympic_marathon (athlete, distance) VALUES ('Eliud Kipchoge', 42195), ('Feyisa Lilesa', 42320), ('Galen Rupp', 42200);", "sql": "SELECT AVG(distance) AS avg_distance FROM olympic_marathon;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1}
{"question": "Write the DML statement from PostgreSQL regression test 'create_misc' (example 43).", "schema": null, "sql": "INSERT INTO f_star (class, a, c, f)\n VALUES ('f', 21, 'hi marcel'::name, '(11,44),(22,55),(33,66)'::polygon);", "explanation": "DML from PostgreSQL core regression test for Create Misc.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 111, "num_statements": 1}
{"question": "What is the average lead time for each fabric supplier in the Asia-Pacific region?", "schema": "CREATE TABLE suppliers (supplier_id INT, supplier_name VARCHAR(50), location VARCHAR(50), lead_time INT);", "sql": "SELECT location, AVG(lead_time) as avg_lead_time FROM suppliers WHERE location LIKE '%Asia-Pacific%' GROUP BY location;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 119, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: What is the sum avg/g with an effic of 858.4?", "schema": "CREATE TABLE table_name_61 (avg_g INTEGER, effic VARCHAR)", "sql": "SELECT SUM(avg_g) FROM table_name_61 WHERE effic = 858.4;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1}
{"question": "PostgreSQL regression test 'subselect': Write the SELECT query (example 214).", "schema": null, "sql": "select * from exists_tbl t1\n where (exists(select 1 from exists_tbl t2 where t1.c1 = t2.c2) or c3 < 0);", "explanation": "Regression test for Subselect in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select * from exists_tbl t1\n where (exists(select 1 from exists_tbl t2 where t1.c1 = t2.c2) or c3 < 0)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 104, "num_statements": 1}
{"question": "Write the DML statement from PostgreSQL regression test 'numeric_big' (example 70).", "schema": null, "sql": "INSERT INTO num_exp_add VALUES (1,2,'-994877526002806872754342148663997.64812998474240514147207095573950146764154822009863493316394610578375247334825932838513167168342610420582834742950389452212867974756590355021495169819086060202117180229196935525386766373096687306110481009743118940565957556492470398904849289222365256698601073536111216152709126800604695001949246634784573028721762079936564434050796321975774729383704426321489070979168993853338252728216162346796960170352897972568238870481118474064783391570102958474141459619245240874849766946530000977144965');", "explanation": "DML from PostgreSQL core regression test for Numeric Big.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 563, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Name the score for game for 25", "schema": "CREATE TABLE table_17103729_8 (score VARCHAR, game VARCHAR)", "sql": "SELECT score FROM table_17103729_8 WHERE game = 25;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1}
{"question": "PL/pgSQL test: Plperl (example 61).", "schema": null, "sql": "CREATE OR REPLACE FUNCTION foo_good() RETURNS SETOF footype AS $$\nreturn [\n {x => 1, y => 2},\n {x => 3, y => 4}\n];\n$$ LANGUAGE plperl;", "explanation": "PL/pgSQL example from PostgreSQL source test for Plperl.", "validation_query": null, "source": "plpgsql_source", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 140, "num_statements": 2}
{"question": "What is the average number of training hours for employees in the 'Marketing' department who have completed diversity and inclusion training?", "schema": "CREATE TABLE Employee_Training (Employee_ID INT, Employee_Name VARCHAR(50), Department VARCHAR(50), Training_Type VARCHAR(50), Hours_Spent DECIMAL(5,2)); INSERT INTO Employee_Training (Employee_ID, Employee_Name, Department, Training_Type, Hours_Spent) VALUES (6, 'Alex Johnson', 'Marketing', 'Diversity and Inclusion', 5.00), (7, 'Taylor Lee', 'Marketing', 'Diversity and Inclusion', 4.00), (8, 'Jasmine Brown', 'Marketing', 'Cybersecurity', 7.00);", "sql": "SELECT AVG(Hours_Spent) FROM Employee_Training WHERE Department = 'Marketing' AND Training_Type = 'Diversity and Inclusion';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 124, "num_statements": 1}
{"question": "Write the DML statement from PostgreSQL regression test 'select_implicit' (example 10).", "schema": null, "sql": "INSERT INTO test_missing_target VALUES (8, 4, 'CCCC', 'I');", "explanation": "DML from PostgreSQL core regression test for Select Implicit.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1}
{"question": "What is the percentage of climate finance that went to climate adaptation projects in South America between 2010 and 2015?", "schema": "CREATE TABLE climate_finance (region VARCHAR(255), year INT, project_type VARCHAR(255), amount FLOAT);", "sql": "SELECT (SUM(CASE WHEN project_type = 'climate adaptation' THEN amount ELSE 0 END) / SUM(amount)) * 100 AS adaptation_percentage FROM climate_finance WHERE year BETWEEN 2010 AND 2015 AND region = 'South America';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 211, "num_statements": 1}
{"question": "What is the maximum property price in each neighborhood in San Francisco?", "schema": "CREATE TABLE sf_neighborhoods (id INT, name VARCHAR(50)); INSERT INTO sf_neighborhoods (id, name) VALUES (1, 'Mission'), (2, 'Castro'), (3, 'Haight-Ashbury'); CREATE TABLE properties (id INT, neighborhood_id INT, price INT); INSERT INTO properties (id, neighborhood_id, price) VALUES (1, 1, 500000), (2, 2, 400000), (3, 1, 600000);", "sql": "SELECT properties.neighborhood_id, MAX(properties.price) FROM properties GROUP BY properties.neighborhood_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 109, "num_statements": 1}
{"question": "What is the minimum installed capacity of wind turbines for all renewable energy projects in Germany?", "schema": "CREATE TABLE renewable_energy (project_name VARCHAR(50), country VARCHAR(50), wind_capacity INT); INSERT INTO renewable_energy (project_name, country, wind_capacity) VALUES ('Project1', 'Germany', 5000), ('Project2', 'Germany', 12000), ('Project3', 'Germany', 8000);", "sql": "SELECT MIN(wind_capacity) FROM renewable_energy WHERE country = 'Germany';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1}
{"question": "PostgreSQL regression test 'jsonb_jsonpath': Write the SELECT query (example 376).", "schema": null, "sql": "select jsonb_path_query('0', '$.boolean()');", "explanation": "Regression test for Jsonb Jsonpath in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select jsonb_path_query('0', '$.boolean()')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": false, "sql_length": 44, "num_statements": 1}
{"question": "pgTAP test for Pgtap--Unpackaged--0.91.0 (assertion 515).", "schema": null, "sql": "ALTER EXTENSION pgtap ADD FUNCTION function_returns( NAME, NAME[], TEXT );", "explanation": "SQL assertion from pgTAP test suite for Pgtap--Unpackaged--0.91.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1}
{"question": "Generate PostgreSQL SQL for: Who is the opponent in the final on 24 November 2008?", "schema": "CREATE TABLE table_name_43 (opponent_in_the_final VARCHAR, date VARCHAR)", "sql": "SELECT opponent_in_the_final FROM table_name_43 WHERE date = '24 november 2008';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1}
{"question": "Write the DML statement from PostgreSQL regression test 'numeric' (example 795).", "schema": null, "sql": "INSERT INTO num_input_test(n1) VALUES ('');", "explanation": "DML from PostgreSQL core regression test for Numeric.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 43, "num_statements": 1}
{"question": "What is the total number of military personnel involved in cybersecurity operations in Asia?", "schema": "CREATE TABLE MilitaryCyberOps (Id INT, Region VARCHAR(50), Personnel INT, Year INT); INSERT INTO MilitaryCyberOps (Id, Region, Personnel, Year) VALUES (1, 'Asia', 200, 2021); INSERT INTO MilitaryCyberOps (Id, Region, Personnel, Year) VALUES (2, 'Europe', 300, 2021);", "sql": "SELECT SUM(Personnel) FROM MilitaryCyberOps WHERE Region = 'Asia';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1}
{"question": "What is the total amount donated and number of donations for each quarter in the 'donations' table?", "schema": "CREATE TABLE donations (donation_id INT, donation_date DATE, donation_amount FLOAT); INSERT INTO donations (donation_id, donation_date, donation_amount) VALUES (1, '2022-01-01', 50.00), (2, '2022-02-01', 100.00), (3, '2022-03-01', 150.00);", "sql": "SELECT DATE_TRUNC('quarter', donation_date) as quarter, SUM(donation_amount) as total_donation, COUNT(donation_id) as num_donations FROM donations GROUP BY quarter ORDER BY quarter;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 181, "num_statements": 1}
{"question": "List the names of investors who have invested in companies that have a female founder.", "schema": "CREATE TABLE Companies (id INT, name TEXT, founder_gender TEXT); INSERT INTO Companies (id, name, founder_gender) VALUES (1, 'Daisy Enterprise', 'Female'); INSERT INTO Companies (id, name, founder_gender) VALUES (2, 'Bright Star Corp', 'Male'); CREATE TABLE Investors (id INT, name TEXT); INSERT INTO Investors (id, name) VALUES (1, 'Venture Capital 3'); INSERT INTO Investors (id, name) VALUES (2, 'Angel Investor 3');", "sql": "SELECT Investors.name FROM Companies INNER JOIN Investors ON TRUE WHERE Companies.founder_gender = 'Female';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 108, "num_statements": 1}
{"question": "Write the DML statement from PostgreSQL regression test 'generated_stored' (example 54).", "schema": null, "sql": "INSERT INTO gtest1v VALUES (4, 8); -- error\nINSERT INTO gtest1v VALUES (5, DEFAULT); -- ok\nINSERT INTO gtest1v VALUES (6, 66), (7, 77); -- error\nINSERT INTO gtest1v VALUES (6, DEFAULT), (7, 77); -- error\nINSERT INTO gtest1v VALUES (6, 66), (7, DEFAULT); -- error\nINSERT INTO gtest1v VALUES (6, DEFAULT), (7, DEFAULT); -- ok\n\nALTER VIEW gtest1v ALTER COLUMN b SET DEFAULT 100;", "explanation": "DML from PostgreSQL core regression test for Generated Stored.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 381, "num_statements": 7}
{"question": "PostgreSQL regression test 'xml': Write the SELECT query (example 72).", "schema": null, "sql": "SELECT xmlroot(xml '