DeanGumas commited on
Commit
825751a
·
1 Parent(s): 15c124e

added scripts for combining training data into single tsv

Browse files
training-data/combined_dataset.tsv ADDED
The diff for this file is too large to render. See raw diff
 
training-data/{train_set.tsv → nba_train_set.tsv} RENAMED
File without changes
training-data/tennis_train_set.tsv ADDED
@@ -0,0 +1,205 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ natural_query sql_query result
2
+ Get the full names of all players taller than 210 cm. SELECT name FROM players WHERE height > 210; Reilly|Opelka
3
+ How many players are left-handed? SELECT COUNT(*) FROM players WHERE hand = 'L'; 1435
4
+ How many matches was the winner’s age was greater than 30 SELECT COUNT(*) FROM matches WHERE winner_age > 30; 72400
5
+ Retrieve the names of tournaments played in 2021 SELECT DISTINCT tourney_name FROM matches WHERE tourney_date BETWEEN 20210101 AND 20211231; Delray Beach Antalya Doha Aus Open Qualies Cordoba Singapore Montpellier Buenos Aires Rotterdam Doha Santiago Marseille Acapulco Dubai Miami Masters Cagliari Marbella Monte Carlo Masters Barcelona Belgrade Estoril Munich Madrid Masters Rome Masters Geneva Lyon Belgrade 2 Parma Roland Garros Stuttgart Halle Queen's Club Eastbourne Mallorca Wimbledon Bastad Hamburg Newport Gstaad Los Cabos Umag Atlanta Kitzbuhel Washington Canada Masters Cincinnati Masters Winston-Salem Us Open Metz Nur-Sultan San Diego Sofia Indian Wells Masters Moscow Antwerp Vienna St. Petersburg Paris Masters Stockholm Istanbul 1 CH Antalya 1 CH Quimper 1 CH Antalya 2 CH Quimper 2 CH Biella 1 CH Cherbourg CH Potchefstroom 1 CH Biella 2 CH Concepcion CH Potchefstroom 2 CH Las Palmas 1 CH Nur-Sultan 1 CH Las Palmas 2 CH Nur-Sultan 2 CH St. Petersburg 1 CH Biella 3 CH St. Petersburg 2 CH Biella 4 CH Cleveland CH Santiago CH Lille CH Lugano CH Zadar CH Marbella CH Oeiras 1 CH Oeiras 2 CH Split 1 CH Belgrade CH Orlando 1 CH Split 2 CH Rome 1 CH Salinas 1 CH Tallahassee CH Ostrava CH Rome 2 CH Salinas 2 CH Biella 5 CH Prague 1 CH Heilbronn CH Zagreb CH Biella 6 CH Oeiras 3 CH Oeiras 4 CH Biella 7 CH Little Rock CH Almaty 1 CH Bratislava 1 CH Lyon CH Nottingham 1 CH Orlando 2 CH Aix-En-Provence CH Almaty 2 CH Forli CH Nottingham 2 CH Prostejov CH Milan CH Porto CH Braunschweig CH Perugia CH Salzburg CH Amersfoort CH Iasi CH Nur-Sultan 3 CH Todi CH Cary 1 CH Nur-Sultan 4 CH Pozoblanco CH Tampere CH Lexington CH Poznan CH Segovia CH Trieste CH Cordenons CH Liberec CH Meerbusch CH Prague 2 CH San Marino CH Luedenscheid CH Verona CH Barletta CH Prague 3 CH Warsaw CH Como CH St. Tropez CH Manacor CH Banja Luka CH Cassis CH Kyiv CH Seville CH Tulln CH Cary 2 CH Istanbul 2 CH Quito CH Rennes CH Szczecin CH Columbus CH Ambato CH Biel CH Braga CH Bucharest CH Lima 1 CH Lisboa CH Murcia CH Orleans CH Sibiu CH Barcelona CH Mouilleron-Le-Captif CH Napoli CH Santiago 1 CH Ercolano CH Santiago 2 CH Villena CH Bogota CH Buenos Aires CH Losinj CH Brest CH Ismaning CH Las Vegas CH Lima 2 CH Bergamo CH Charlottesville CH Eckental CH Guayaquil CH Tenerife CH Bratislava 2 CH Knoxville CH Montevideo CH Ortisei CH Roanne CH Campinas CH Champaign CH Helsinki CH Pau CH Bari CH Brasilia CH Manama CH Puerto Vallarta CH Antalya 3 CH Forli 2 CH Sao Paulo CH Antalya 4 CH Florianopolis CH Forli 3 CH Maia 1 CH Maia 2 CH Rio De Janeiro CH Tokyo Olympics Tour Finals Australian Open NextGen Finals Atp Cup Great Ocean Road Open Murray River Open Davis Cup Finals RR: ESP vs ECU Davis Cup Finals RR: ESP vs RTF Davis Cup Finals RR: RTF vs ECU Davis Cup Finals RR: CAN vs KAZ Davis Cup Finals RR: CAN vs SWE Davis Cup Finals RR: KAZ vs SWE Davis Cup Finals RR: FRA vs CZE Davis Cup Finals RR: FRA vs GBR Davis Cup Finals RR: GBR vs CZE Davis Cup Finals RR: AUS vs HUN Davis Cup Finals RR: CRO vs AUS Davis Cup Finals RR: CRO vs HUN Davis Cup Finals RR: ITA vs COL Davis Cup Finals RR: USA vs COL Davis Cup Finals RR: USA vs ITA Davis Cup Finals RR: GER vs AUT Davis Cup Finals RR: SRB vs AUT Davis Cup Finals RR: SRB vs GER Davis Cup Finals SF: CRO vs SRB Davis Cup Finals QF: GBR vs GER Davis Cup Finals QF: ITA vs CRO Davis Cup Finals F: RTF vs CRO Davis Cup Finals SF: RTF vs GER Davis Cup Finals QF: RTF vs SWE Davis Cup Finals QF: SRB vs KAZ Davis Cup WG1 PO: UKR vs NOR Davis Cup WG1 R1: ARG vs BLR Davis Cup WG1 R1: BEL vs BOL Davis Cup WG1 R1: BIH vs PER Davis Cup WG1 R1: BRA vs LBN Davis Cup WG1 R1: CHI vs SVK Davis Cup WG1 R1: IND vs FIN Davis Cup WG1 R1: ISR vs UKR Davis Cup WG1 R1: JPN vs PAK Davis Cup WG1 R1: KOR vs NZL Davis Cup WG1 R1: NED vs URU Davis Cup WG1 R1: POR vs ROU Davis Cup WG1 R1: UZB vs NOR Davis Cup WG2 PO: TUN vs ZIM Davis Cup WG2 R1: BAR vs INA Davis Cup WG2 R1: DOM vs TUN Davis Cup WG2 R1: LTU vs GRE Davis Cup WG2 R1: MEX vs BUL Davis Cup WG2 R1: POL vs ESA Davis Cup WG2 R1: RSA vs VEN Davis Cup WG2 R1: SLO vs PAR Davis Cup WG2 R1: SUI vs EST Davis Cup WG2 R1: THA vs DEN Davis Cup WG2 R1: TUR vs LAT Laver Cup M15 Cairo M15 Manacor M15 Monastir M15 Antalya M15 Villa Maria M15 Cordoba M25 Villa Allende M25 Rio Cuarto M15 Bad Waltersdorf M25 Telfs M25 Kottingbrunn M15 Warmbad-Villach M15 Huy M25 Koksijde M25 Eupen M15 Brcko M25 Kiseljak M15 Sarajevo M15 Prijedor M15 Doboj M15 Cochabamba M15 Recife M15 Brasilia M25 Rio do Sul M25 Aparecida de Goiania M15 Sofia M15 Sozopol M15 Ibague M25 Medellin M15 Cundinamarca M15 Porec M15 Rovinj M15 Opatija M15 Sibenik M25 Most M25 Prague M25 Jablonec nad Nisou M25 Prostejov M25 Ricany M25 Pardubice M15 Opava M15 Ostrava M15 Vejle M15 Frederiksberg M25 Santo Domingo M15 Santo Domingo M25 Portoviejo M25 Guayaquil M15 Sharm El Sheikh M25 Villena M15 La Nucia M25 La Nucia M25 Reus M15 Valldoreix M25 Bakio M15 Marbella M15 Majadahonda (Madrid) M15 Las Palmas de Gran Canaria M15 Platja D'Aro M25 Denia M25 Vic M25 Gandia M15 Xativa M15 Girona M25 Santander M25 Oviedo M15 Melilla M25 Madrid M15 Madrid M15 Torello M15 Nules M15 Benicarlo M15 Parnu M15 Helsinki M15 Kouvola M15+H Bressuire M15 Grenoble M15 Poitiers M25 Angers M25 Montauban M25 Grasse M25 Bourg-en-Bresse M25+H Ajaccio M25 Uriage M25 Bagneres-De-Bigorre M25+H Plaisir M15 Forbach M25 Nevers M25+H Rodez M25 Toulouse M25 Sarreguemines M25 Villers Les Nancy M25 Saint Dizier M15 Tbilisi M25 Telavi M15 Telavi M25 Meerbusch M15 Troisdorf M25 Frankfurt am Main M25 Wetzlar M25 Marburg M25 Trier M25 Ueberlingen M15 Allershausen M25 Hamburg M15 Heraklion M15 Guatemala M25 Budapest M15 Indore M15 Lucknow M15 Pune M15 New Delhi M15 Gurugram M15 Ramat Hasharon M15 Jerusalem M25 Meitar M25 Afula M15 L'Aquila M15 Gaiba M15 Bergamo M15 Genova M25 Casinalbo M15 Perugia M25+H Lesa M25 Bolzano M15 Pescara M15 Selva Gardena M15 Nur-Sultan M15 Shymkent M25 Nur-Sultan M15 EschAlzette M15 Cancun M15 Skopje M25 Skopje M15 Ulcinj M25 The Hague M25 Alkmaar M15 Oldenzaal M15 Lambare M25 Lima M25 Wroclaw M25 Grodzisk Mazowiecki M25 Poznan M15 Gdynia M15 Lodz M25 Vale do Lobo M25 Faro M25 Idanha-a-Nova M15 Castelo Branco M15 Almada M25 Sintra M25 Setubal M25 Loule M25 Quinta Do Lago M25 Portimao M15 Doha M15 Bucharest M15 Curtea de Arges M25 Pitesti M25+H Bacau M25 Johannesburg Markspark M25 Johannesburg Ellispark M25 Pretoria M15 St. Petersburg M15 Kazan M25 Velenje M25 Belgrade M15 Novi Sad M15 Zlatibor M15 Pirot M25 Trimbach M25 Biel M25 Klosters M25 Sierre M25 Muttenz M25 Caslano M15 Bratislava M15 Poprad M15 Zilina M25 Jonkoping M25 Falun M15 Novomoskovsk M15 Chornomorsk M15 Vyshkovo M25 Naples FL M25 Pensacola FL M15 Weston FL M25 Tulsa OK M15 Champaign IL M25 Wichita KS M15 Edwardsville IL M25 Champaign IL M25 Decatur IL M15 Fayetteville AR M15 Lubbock TX M15 Ithaca NY M15 Vero Beach FL M25 Calabasas CA M15 Tallahassee FL M15 Naples FL M25 Harlingen TX M25 Austin TX M25 Columbus OH M15 East Lansing MI
6
+ Find the number of matches that lasted more than 180 minutes SELECT COUNT(*) FROM matches WHERE minutes > 180; 5425
7
+ What is the average height of players from ESP? SELECT AVG(height) FROM players WHERE ioc = 'ESP'; 180.697183098592
8
+ Find the average age of losers in all matches SELECT AVG(loser_age) FROM matches; 23.6776674381365
9
+ List the first names of players born after the year 2008. SELECT name_first FROM players WHERE dob > 20080101; Vito Antonio
10
+ How many players are taller than the average height? SELECT COUNT(*) FROM players WHERE height > (SELECT AVG(height) FROM players); 1366
11
+ Which player had the most match wins? SELECT winner_name, COUNT(*) AS wins FROM matches GROUP BY winner_name ORDER BY wins DESC LIMIT 1; |26399
12
+ Find the average match duration for all tournaments in 2022 SELECT AVG(minutes) FROM matches WHERE tourney_date BETWEEN 20220101 AND 20221231; 104.704628353733
13
+ How many matches included a score containing "6-0"? SELECT COUNT(*) FROM matches WHERE score LIKE '%6-0%'; 94269
14
+ What is the average number of minutes per match in 2023? SELECT AVG(minutes) FROM matches WHERE tourney_date BETWEEN 20230101 AND 20231231; 106.879613289342
15
+ What is the average age of winners in the year 2021? SELECT AVG(winner_age) FROM matches WHERE tourney_date BETWEEN 20210101 AND 20211231; 24.6910463323964
16
+ How many players were born before 1990? SELECT COUNT(*) FROM players WHERE dob < 19900101; 29388
17
+ Find the player with the highest total points over all ranking dates. SELECT player, SUM(Points) AS total_points FROM rankings GROUP BY player ORDER BY total_points DESC LIMIT 1; 104925|7563461.0
18
+ Count how many matches were won by players over 35 years old. SELECT COUNT(*) FROM matches WHERE winner_age > 35; 9305
19
+ What is the minimum height recorded among all players? SELECT MIN(height) FROM players; 145.0
20
+ Which country has the most players? SELECT ioc, COUNT(*) AS num_players FROM players GROUP BY ioc ORDER BY num_players DESC LIMIT 1; USA|13102
21
+ What is the average height difference between winners and losers? SELECT AVG(winner_ht - loser_ht) AS avg_height_diff FROM matches; 0.23040674261838
22
+ Count the number of matches that lasted exactly 5 sets. SELECT COUNT(*) FROM matches WHERE best_of = '5' AND score LIKE '% % % %'; 30219
23
+ What is the maximum number of minutes played in a single match? SELECT MAX(minutes) FROM matches; 4756.0
24
+ Which tournament had the longest average match duration? SELECT tourney_name, AVG(minutes) AS avg_duration FROM matches GROUP BY tourney_name ORDER BY avg_duration DESC LIMIT 1; Davis Cup WG R1: ITA vs JPN|230.666666666667
25
+ List the three youngest players. SELECT name, dob FROM players ORDER BY dob DESC LIMIT 3; Vito Antonio|20080113.0 Linus|20071210.0 Jerry Christopher|20071203.0
26
+ Count how many matches ended with the score “6-0 6-0”. SELECT COUNT(*) FROM matches WHERE score = '6-0 6-0'; 4197
27
+ How many distinct players have appeared in matches as winners? SELECT COUNT(DISTINCT winner_id) FROM matches; 18648
28
+ Find the country with the tallest average player height. SELECT ioc, AVG(height) AS avg_height FROM players GROUP BY ioc ORDER BY avg_height DESC LIMIT 1; YUG|194.0
29
+ What is the most common match duration (in minutes)? SELECT minutes, COUNT(*) AS freq FROM matches GROUP BY minutes ORDER BY freq DESC LIMIT 1; |748394
30
+ Calculate the average number of matches per tournament. SELECT AVG(match_count) FROM ( SELECT tourney_id, COUNT(*) AS match_count FROM matches GROUP BY tourney_id ); 30.672535439187
31
+ How many matches were played at Wimbledon in 2022? SELECT COUNT(*) FROM matches WHERE tourney_name = 'Wimbledon' AND tourney_date BETWEEN 20220101 AND 20221231; 239
32
+ What is the average duration of matches at the US Open? SELECT AVG(minutes) FROM matches WHERE tourney_name = 'US Open'; 135.031421838178
33
+ List the top 3 tournaments by average match length SELECT tourney_name, AVG(minutes) AS avg_duration FROM matches GROUP BY tourney_name ORDER BY avg_duration DESC LIMIT 3; Davis Cup WG R1: ITA vs JPN|230.666666666667 Davis Cup WG R1: ARG vs ITA|208.0 Davis Cup WG F: ARG vs CRO|207.5
34
+ How many matches has Roger Federer won? SELECT COUNT(*) FROM matches WHERE winner_name = 'Roger Federer'; 1305
35
+ How many matches has Rafael Nadal lost? SELECT COUNT(*) FROM matches WHERE loser_name = 'Rafael Nadal'; 255
36
+ What is Roger Federer’s average match duration? SELECT AVG(minutes) FROM matches WHERE winner_name = 'Roger Federer' OR loser_name = 'Roger Federer'; 101.931882022472
37
+ In how many different tournaments did Pete Sampras play? SELECT COUNT(DISTINCT tourney_name) FROM matches WHERE winner_name = 'Pete Sampras' OR loser_name = 'Pete Sampras'; 82
38
+ What is the average winner age at Wimbledon? SELECT AVG(winner_age) FROM matches WHERE tourney_name = 'Wimbledon'; 26.7238638689215
39
+ How many matches at the US Open lasted more than 240 minutes? SELECT COUNT(*) FROM matches WHERE tourney_name = 'US Open' AND minutes > 240; 94
40
+ Find the average height of players who participated in Wimbledon. SELECT AVG(height) FROM players WHERE player_id IN ( SELECT winner_id FROM matches WHERE tourney_name = 'Wimbledon' UNION SELECT loser_id FROM matches WHERE tourney_name = 'Wimbledon' ); 184.329136690647
41
+ Count the number of matches where both players were from the same country at the US Open. SELECT COUNT(*) FROM matches WHERE tourney_name = 'US Open' AND winner_ioc = loser_ioc; 6604
42
+ How many matches did Roger Federer and Rafael Nadal play against each other? SELECT COUNT(*) FROM matches WHERE (winner_name = 'Roger Federer' AND loser_name = 'Rafael Nadal') OR (winner_name = 'Rafael Nadal' AND loser_name = 'Roger Federer'); 41
43
+ What is the average loser age in Wimbledon finals (best of 5)? SELECT AVG(loser_age) FROM matches WHERE tourney_name = 'Wimbledon' AND best_of = '5'; 26.8972819437329
44
+ Count the number of matches played in 2023 by Rafael Nadal. SELECT COUNT(*) FROM matches WHERE (winner_name = 'Rafael Nadal' OR loser_name = 'Rafael Nadal') AND tourney_date BETWEEN 20230101 AND 20231231; 4
45
+ Find the average points for Pete Sampras across all rankings. SELECT AVG(Points) FROM rankings r JOIN players p ON r.player = p.player_id WHERE p.name = 'Pete Sampras';
46
+ What is the earliest tournament date in the database? SELECT MIN(tourney_date) FROM matches; 163.0
47
+ What is the average height of all Wimbledon winners? SELECT AVG(winner_ht) FROM matches WHERE tourney_name = 'Wimbledon'; 185.318105616094
48
+ Who is the most recent US Open winner? SELECT winner_name FROM matches WHERE tourney_name = 'US Open' ORDER BY tourney_date DESC LIMIT 1; Novak Djokovic
49
+ How many different countries have players who won matches at the US Open? SELECT COUNT(DISTINCT winner_ioc) FROM matches WHERE tourney_name = 'US Open'; 90
50
+ Count the number of tournaments Roger Federer has participated in since 2015. SELECT COUNT(DISTINCT tourney_id) FROM matches WHERE (winner_name = 'Roger Federer' OR loser_name = 'Roger Federer') AND tourney_date >= 20150101; 73
51
+ What is the average height of Wimbledon losers? SELECT AVG(loser_ht) FROM matches WHERE tourney_name = 'Wimbledon'; 184.666780045351
52
+ Get the total number of matches Rafael Nadal has played on or after 2021. SELECT COUNT(*) FROM matches WHERE (winner_name = 'Rafael Nadal' OR loser_name = 'Rafael Nadal') AND tourney_date >= 20210101; 93
53
+ Find the tournament with the fewest matches overall. SELECT tourney_name, COUNT(*) AS match_count FROM matches GROUP BY tourney_name ORDER BY match_count ASC LIMIT 1; Cannes Chps|1
54
+ How many matches did Pete Sampras play at Wimbledon? SELECT COUNT(*) FROM matches WHERE tourney_name = 'Wimbledon' AND (winner_name = 'Pete Sampras' OR loser_name = 'Pete Sampras'); 70
55
+ What was Roger Federer’s average age when winning matches at Wimbledon? SELECT AVG(winner_age) FROM matches WHERE tourney_name = 'Wimbledon' AND winner_name = 'Roger Federer'; 29.3160377358491
56
+ Find the total number of distinct winners at Wimbledon. SELECT COUNT(DISTINCT winner_name) FROM matches WHERE tourney_name = 'Wimbledon'; 2706
57
+ What is the average match duration for matches between 2010 and 2020 at the US Open? SELECT AVG(minutes) FROM matches WHERE tourney_name = 'US Open' AND tourney_date BETWEEN 20100101 AND 20201231; 130.184703433923
58
+ What is the average age of Novak Djokovic when he won his matches? SELECT AVG(winner_age) FROM matches WHERE winner_name = 'Novak Djokovic'; 26.5260390161154
59
+ How many matches has Novak Djokovic lost at Wimbledon? SELECT COUNT(*) FROM matches WHERE loser_name = 'Novak Djokovic' AND tourney_name = 'Wimbledon'; 11
60
+ What is the total number of matches played by Andy Murray in the US Open? SELECT COUNT(*) FROM matches WHERE winner_name = 'Andy Murray' OR loser_name = 'Andy Murray' AND tourney_name = 'US Open'; 853
61
+ What is the average match duration for Rafael Nadal at Wimbledon? SELECT AVG(minutes) FROM matches WHERE tourney_name = 'Wimbledon' AND (winner_name = 'Rafael Nadal' OR loser_name = 'Rafael Nadal'); 159.455882352941
62
+ How many matches did Roger Federer win in 2019? SELECT COUNT(*) FROM matches WHERE winner_name = 'Roger Federer' AND tourney_date BETWEEN 20190101 AND 20191231; 55
63
+ How many matches did Andre Agassi lose in the US Open? SELECT COUNT(*) FROM matches WHERE tourney_name = 'US Open' AND loser_name = 'Andre Agassi'; 19
64
+ What is the average height of all Wimbledon winners? SELECT AVG(winner_ht) FROM matches WHERE tourney_name = 'Wimbledon'; 185.318105616094
65
+ How many matches lasted longer than 200 minutes at the US Open? SELECT COUNT(*) FROM matches WHERE tourney_name = 'US Open' AND minutes > 200; 404
66
+ What is the total number of wins by Novak Djokovic against Rafael Nadal? SELECT COUNT(*) FROM matches WHERE winner_name = 'Novak Djokovic' AND loser_name = 'Rafael Nadal'; 30
67
+ What is the average height of players that lost to Roger Federer? SELECT AVG(loser_ht) FROM matches WHERE winner_name = 'Roger Federer'; 186.174961119751
68
+ How many Wimbledon matches did Andy Murray play after 2015? SELECT COUNT(*) FROM matches WHERE tourney_name = 'Wimbledon' AND tourney_date > 20150101 AND (winner_name = 'Andy Murray' OR loser_name = 'Andy Murray'); 25
69
+ How many matches at the US Open lasted less than 90 minutes? SELECT COUNT(*) FROM matches WHERE tourney_name = 'US Open' AND minutes < 90; 649
70
+ What is the average age of Wimbledon winners? SELECT AVG(winner_age) FROM matches WHERE tourney_name = 'Wimbledon'; 26.7238638689215
71
+ How many US Open matches has Novak Djokovic participated in total? SELECT COUNT(*) FROM matches WHERE tourney_name = 'US Open' AND (winner_name = 'Novak Djokovic' OR loser_name = 'Novak Djokovic'); 84
72
+ How many matches did Pete Sampras win when he was under 25 years old? SELECT COUNT(*) FROM matches WHERE winner_name = 'Pete Sampras' AND winner_age < 25; 480
73
+ What is the shortest match played by Novak Djokovic? SELECT MIN(minutes) FROM matches WHERE winner_name = 'Novak Djokovic' OR loser_name = 'Novak Djokovic'; 0.0
74
+ How many matches did Andre Agassi win in less than 100 minutes? SELECT COUNT(*) FROM matches WHERE winner_name = 'Andre Agassi' AND minutes < 100; 417
75
+ What is Roger Federer’s average age when winning matches at Wimbledon? SELECT AVG(winner_age) FROM matches WHERE winner_name = 'Roger Federer' AND tourney_name = 'Wimbledon'; 29.3160377358491
76
+ How many matches did Andy Murray lose at the US Open after 2015? SELECT COUNT(*) FROM matches WHERE loser_name = 'Andy Murray' AND tourney_name = 'US Open' AND tourney_date > 20150101; 3
77
+ What is the average height of players who defeated Roger Federer? SELECT AVG(winner_ht) FROM matches WHERE loser_name = 'Roger Federer'; 186.934482758621
78
+ How many matches did Pete Sampras win in the Australian Open? SELECT COUNT(*) FROM matches WHERE winner_name = 'Pete Sampras' AND tourney_name = 'Australian Open'; 45
79
+ Show how many wins Andy Murray has on each surface SELECT surface, COUNT(*) AS wins FROM matches WHERE winner_name = 'Andy Murray' GROUP BY surface; Carpet|10 Clay|143 Grass|135 Hard|553
80
+ What is Andre Agassi’s average number of minutes per match at the US Open? SELECT AVG(minutes) FROM matches WHERE (winner_name = 'Andre Agassi' OR loser_name = 'Andre Agassi') AND tourney_name = 'US Open'; 130.436363636364
81
+ What is the average height of all US Open winners? SELECT AVG(winner_ht) FROM matches WHERE tourney_name = 'US Open'; 184.635440803266
82
+ How many five-set matches has Novak Djokovic played at Wimbledon? SELECT COUNT(*) FROM matches WHERE (winner_name = 'Novak Djokovic' OR loser_name = 'Novak Djokovic') AND tourney_name = 'Wimbledon' AND best_of = 5; 104
83
+ What is the average age difference between winners and losers at Wimbledon? SELECT AVG(winner_age - loser_age) FROM matches WHERE tourney_name = 'Wimbledon'; -0.234355556006958
84
+ Which player has defeated Rafael Nadal the most? SELECT winner_name, COUNT(*) AS wins_against FROM matches WHERE loser_name = 'Rafael Nadal' GROUP BY winner_name ORDER BY wins_against DESC LIMIT 1; Novak Djokovic|30
85
+ What is John McEnroe’s total number of wins at the US Open? SELECT COUNT(*) FROM matches WHERE winner_name = 'John McEnroe' AND tourney_name = 'US Open'; 66
86
+ Which player has beaten Roger Federer the most times? SELECT winner_name, COUNT(*) AS wins_against FROM matches WHERE loser_name = 'Roger Federer' GROUP BY winner_name ORDER BY wins_against DESC LIMIT 1; Novak Djokovic|28
87
+ How many times has Novak Djokovic defeated Andy Murray? SELECT COUNT(*) FROM matches WHERE winner_name = 'Novak Djokovic' AND loser_name = 'Andy Murray'; 26
88
+ Who has John McEnroe defeated the most? SELECT loser_name, COUNT(*) AS wins FROM matches WHERE winner_name = 'John McEnroe' GROUP BY loser_name ORDER BY wins DESC LIMIT 1; Jimmy Connors|21
89
+ How many losses does Pete Sampras have at Wimbledon? SELECT COUNT(*) FROM matches WHERE loser_name = 'Pete Sampras' AND tourney_name = 'Wimbledon'; 7
90
+ What is the total number of matches Roger Federer played at the Australian Open? SELECT COUNT(*) FROM matches WHERE (winner_name = 'Roger Federer' OR loser_name = 'Roger Federer') AND tourney_name = 'Australian Open'; 118
91
+ How many times has Andy Murray beaten Novak Djokovic? SELECT COUNT(*) FROM matches WHERE winner_name = 'Andy Murray' AND loser_name = 'Novak Djokovic'; 11
92
+ What is the total number of matches Rafael Nadal played at Wimbledon? SELECT COUNT(*) FROM matches WHERE (winner_name = 'Rafael Nadal' OR loser_name = 'Rafael Nadal') AND tourney_name = 'Wimbledon'; 71
93
+ Who has Andre Agassi defeated the most times? SELECT loser_name, COUNT(*) AS wins FROM matches WHERE winner_name = 'Andre Agassi' GROUP BY loser_name ORDER BY wins DESC LIMIT 1; Michael Chang|15
94
+ What is the total number of matches Novak Djokovic lost at the US Open? SELECT COUNT(*) FROM matches WHERE loser_name = 'Novak Djokovic' AND tourney_name = 'US Open'; 11
95
+ Which player defeated Andy Murray the most? SELECT winner_name, COUNT(*) AS wins FROM matches WHERE loser_name = 'Andy Murray' GROUP BY winner_name ORDER BY wins DESC LIMIT 1; Novak Djokovic|26
96
+ How many times has Roger Federer defeated Rafael Nadal? SELECT COUNT(*) FROM matches WHERE winner_name = 'Roger Federer' AND loser_name = 'Rafael Nadal'; 17
97
+ What is the total number of wins Pete Sampras has at the US Open? SELECT COUNT(*) FROM matches WHERE winner_name = 'Pete Sampras' AND tourney_name = 'US Open'; 71
98
+ How many matches did Andre Agassi win at Wimbledon? SELECT COUNT(*) FROM matches WHERE winner_name = 'Andre Agassi' AND tourney_name = 'Wimbledon'; 46
99
+ How many times did Novak Djokovic beat Roger Federer at Wimbledon? SELECT COUNT(*) FROM matches WHERE winner_name = 'Novak Djokovic' AND loser_name = 'Roger Federer' AND tourney_name = 'Wimbledon'; 3
100
+ Who has beaten Rafael Nadal the most at Wimbledon? SELECT winner_name, COUNT(*) AS wins FROM matches WHERE loser_name = 'Rafael Nadal' AND tourney_name = 'Wimbledon' GROUP BY winner_name ORDER BY wins DESC LIMIT 1; Roger Federer|3
101
+ What is the total number of wins John McEnroe has at Wimbledon? SELECT COUNT(*) FROM matches WHERE winner_name = 'John McEnroe' AND tourney_name = 'Wimbledon'; 59
102
+ How many total losses does Andre Agassi have at the Australian Open? SELECT COUNT(*) FROM matches WHERE loser_name = 'Andre Agassi' AND tourney_name = 'Australian Open'; 5
103
+ How many matches did Novak Djokovic win at the Rolland Garros? SELECT COUNT(*) FROM matches WHERE winner_name = 'Novak Djokovic' AND tourney_name = 'Roland Garros'; 96
104
+ Which player has defeated Pete Sampras the most? SELECT winner_name, COUNT(*) AS wins FROM matches WHERE loser_name = 'Pete Sampras' GROUP BY winner_name ORDER BY wins DESC LIMIT 1; Andre Agassi|14
105
+ How many times has Andy Murray defeated Roger Federer? SELECT COUNT(*) FROM matches WHERE winner_name = 'Andy Murray' AND loser_name = 'Roger Federer'; 11
106
+ How many times has Carlos Alcaraz defeated Novak Djokovic? SELECT COUNT(*) FROM matches WHERE winner_name = 'Carlos Alcaraz' AND loser_name = 'Novak Djokovic'; 2
107
+ What is the number of wins Rafael Nadal has at the Australian Open? SELECT COUNT(*) FROM matches WHERE winner_name = 'Rafael Nadal' AND tourney_name = 'Australian Open'; 77
108
+ What is the number of matches Novak Djokovic played in 2019? SELECT COUNT(*) FROM matches WHERE (winner_name = 'Novak Djokovic' OR loser_name = 'Novak Djokovic') AND tourney_date BETWEEN 20190101 AND 20191231; 65
109
+ Which player defeated Andre Agassi the most at the US Open? SELECT winner_name, COUNT(*) AS wins FROM matches WHERE loser_name = 'Andre Agassi' AND tourney_name = 'US Open' GROUP BY winner_name ORDER BY wins DESC LIMIT 1; Pete Sampras|4
110
+ What is the total number of losses Roger Federer has at the Roland Garros? SELECT COUNT(*) FROM matches WHERE loser_name = 'Roger Federer' AND tourney_name = 'Roland Garros'; 18
111
+ What is the total number of losses Jannik Sinner has at the Roland Garros? SELECT COUNT(*) FROM matches WHERE loser_name = 'Jannik Sinner' AND tourney_name = 'Roland Garros'; 4
112
+ What is the total number of losses Nick Kyrgios has at the Roland Garros? SELECT COUNT(*) FROM matches WHERE loser_name = 'Nick Kyrgios' AND tourney_name = 'Roland Garros'; 5
113
+ What is the total number of losses Alexander Zverev has at the Roland Garros? SELECT COUNT(*) FROM matches WHERE loser_name = 'Alexander Zverev' AND tourney_name = 'Roland Garros'; 9
114
+ What is the total number of losses Taylor Fritz has at the Roland Garros? SELECT COUNT(*) FROM matches WHERE loser_name = 'Taylor Fritz' AND tourney_name = 'Roland Garros'; 7
115
+ How many matches has Carlos Alcaraz won at Wimbledon? SELECT COUNT(*) FROM matches WHERE winner_name = 'Carlos Alcaraz' AND tourney_name = 'Wimbledon'; 11
116
+ Which player has defeated Jannik Sinner the most times? SELECT winner_name, COUNT(*) AS wins FROM matches WHERE loser_name = 'Jannik Sinner' GROUP BY winner_name ORDER BY wins DESC LIMIT 1; Stefanos Tsitsipas|6
117
+ How many times has Nick Kyrgios beaten Rafael Nadal? SELECT COUNT(*) FROM matches WHERE winner_name = 'Nick Kyrgios' AND loser_name = 'Rafael Nadal'; 4
118
+ What is Alexander Zverev’s total number of wins at the US Open? SELECT COUNT(*) FROM matches WHERE winner_name = 'Alexander Zverev' AND tourney_name = 'US Open'; 11
119
+ How many matches has Taylor Fritz lost at Wimbledon? SELECT COUNT(*) FROM matches WHERE loser_name = 'Taylor Fritz' AND tourney_name = 'Wimbledon'; 7
120
+ Who has defeated Novak Djokovic the most at the US Open? SELECT winner_name, COUNT(*) AS wins FROM matches WHERE loser_name = 'Novak Djokovic' AND tourney_name = 'US Open' GROUP BY winner_name ORDER BY wins DESC LIMIT 1; Roger Federer|3
121
+ How many matches did Roger Federer win at Wimbledon after 2010? SELECT COUNT(*) FROM matches WHERE winner_name = 'Roger Federer' AND tourney_name = 'Wimbledon' AND tourney_date >= 20100101; 54
122
+ How many wins does Jannik Sinner have against Carlos Alcaraz? SELECT COUNT(*) FROM matches WHERE winner_name = 'Jannik Sinner' AND loser_name = 'Carlos Alcaraz'; 4
123
+ How many victories does Rafael Nadal have over Alexander Zverev? SELECT COUNT(*) FROM matches WHERE winner_name = 'Rafael Nadal' AND loser_name = 'Alexander Zverev'; 7
124
+ What is the total number of matches Nick Kyrgios played at the Australian Open? SELECT COUNT(*) FROM matches WHERE (winner_name = 'Nick Kyrgios' OR loser_name = 'Nick Kyrgios') AND tourney_name = 'Australian Open'; 28
125
+ Who has Carlos Alcaraz defeated the most? SELECT loser_name, COUNT(*) AS wins FROM matches WHERE winner_name = 'Carlos Alcaraz' GROUP BY loser_name ORDER BY wins DESC LIMIT 1; Stefanos Tsitsipas|5
126
+ What is the total number of wins Jannik Sinner has at the Roland Garros? SELECT COUNT(*) FROM matches WHERE winner_name = 'Jannik Sinner' AND tourney_name = 'Roland Garros'; 11
127
+ How many times has Nick Kyrgios beaten Novak Djokovic? SELECT COUNT(*) FROM matches WHERE winner_name = 'Nick Kyrgios' AND loser_name = 'Novak Djokovic'; 2
128
+ How many matches did Alexander Zverev win in 2022? SELECT COUNT(*) FROM matches WHERE winner_name = 'Alexander Zverev' AND tourney_date BETWEEN 20220101 AND 20221231; 29
129
+ Which player has Carlos Alcaraz lost to the most? SELECT winner_name, COUNT(*) AS losses FROM matches WHERE loser_name = 'Carlos Alcaraz' GROUP BY winner_name ORDER BY losses DESC LIMIT 1; Alexander Zverev|5
130
+ What is Taylor Fritz’s total number of wins at the US Open? SELECT COUNT(*) FROM matches WHERE winner_name = 'Taylor Fritz' AND tourney_name = 'US Open'; 3
131
+ How many matches did Rafael Nadal win at Wimbledon after 2015? SELECT COUNT(*) FROM matches WHERE winner_name = 'Rafael Nadal' AND tourney_name = 'Wimbledon' AND tourney_date >= 20150101; 19
132
+ Who has defeated Alexander Zverev the most at the Australian Open? SELECT winner_name, COUNT(*) AS wins FROM matches WHERE loser_name = 'Alexander Zverev' AND tourney_name = 'Australian Open' GROUP BY winner_name ORDER BY wins DESC LIMIT 1; Rafael Nadal|1
133
+ How many times has Jannik Sinner defeated Novak Djokovic? SELECT COUNT(*) FROM matches WHERE winner_name = 'Jannik Sinner' AND loser_name = 'Novak Djokovic'; 3
134
+ How many matches did Carlos Alcaraz lose at the US Open? SELECT COUNT(*) FROM matches WHERE loser_name = 'Carlos Alcaraz' AND tourney_name = 'US Open'; 0
135
+ How many wins does John McEnroe have against Jimmy Connors? SELECT COUNT(*) FROM matches WHERE winner_name = 'John McEnroe' AND loser_name = 'Jimmy Connors'; 21
136
+ Which player defeated Nick Kyrgios the most times? SELECT winner_name, COUNT(*) AS wins FROM matches WHERE loser_name = 'Nick Kyrgios' GROUP BY winner_name ORDER BY wins DESC LIMIT 1; Roger Federer|7
137
+ How many matches has Roger Federer lost to Andy Murray? SELECT COUNT(*) FROM matches WHERE loser_name = 'Roger Federer' AND winner_name = 'Andy Murray'; 11
138
+ How many matches did Taylor Fritz win on grass courts? SELECT COUNT(*) FROM matches WHERE winner_name = 'Taylor Fritz' AND surface = 'Grass'; 32
139
+ Who has beaten Rafael Nadal the most at the US Open? SELECT winner_name, COUNT(*) AS wins FROM matches WHERE loser_name = 'Rafael Nadal' AND tourney_name = 'US Open' GROUP BY winner_name ORDER BY wins DESC LIMIT 1; Juan Martin del Potro|2
140
+ What is the highest number of minutes Rafael Nadal has played in a single US Open match? SELECT MAX(minutes) FROM matches WHERE (winner_name = 'Rafael Nadal' OR loser_name = 'Rafael Nadal') AND tourney_name = 'US Open'; 290.0
141
+ What is Jannik Sinner’s average age during matches at the Australian Open? SELECT AVG(CASE WHEN winner_name = 'Jannik Sinner' THEN winner_age ELSE loser_age END) FROM matches WHERE (winner_name = 'Jannik Sinner' OR loser_name = 'Jannik Sinner') AND tourney_name = 'Australian Open'; 21.0842105263158
142
+ How many matches has Alexander Zverev played that lasted more than 200 minutes? SELECT COUNT(*) FROM matches WHERE (winner_name = 'Alexander Zverev' OR loser_name = 'Alexander Zverev') AND minutes > 200; 34
143
+ What is the average height of players Nick Kyrgios defeated? SELECT AVG(loser_ht) FROM matches WHERE winner_name = 'Nick Kyrgios'; 186.636363636364
144
+ What is the maximum age at which Roger Federer won a match? SELECT MAX(winner_age) FROM matches WHERE winner_name = 'Roger Federer'; 39.8
145
+ What is the minimum age Rafael Nadal lost a match? SELECT MIN(loser_age) FROM matches WHERE loser_name = 'Rafael Nadal'; 15.2
146
+ What is the average ranking of players defeated by Novak Djokovic? SELECT AVG(r.rank) FROM matches m JOIN rankings r ON m.loser_id = r.player WHERE m.winner_name = 'Novak Djokovic'; 212.317855446654
147
+ What is Taylor Fritz’s average match duration at the US Open? SELECT AVG(minutes) FROM matches WHERE (winner_name = 'Taylor Fritz' OR loser_name = 'Taylor Fritz') AND tourney_name = 'US Open'; 160.75
148
+ What is the maximum number of minutes John McEnroe played at Wimbledon? SELECT MAX(minutes) FROM matches WHERE (winner_name = 'John McEnroe' OR loser_name = 'John McEnroe') AND tourney_name = 'Wimbledon'; 249.0
149
+ What is the average age of Alexander Zverev when losing matches? SELECT AVG(loser_age) FROM matches WHERE loser_name = 'Alexander Zverev'; 20.9510638297872
150
+ What is the minimum height of any player who defeated Roger Federer? SELECT MIN(winner_ht) FROM matches WHERE loser_name = 'Roger Federer'; 173.0
151
+ What is the maximum height of any opponent Jannik Sinner has beaten? SELECT MAX(loser_ht) FROM matches WHERE winner_name = 'Jannik Sinner'; 211.0
152
+ What is the highest match duration in any match involving Nick Kyrgios? SELECT MAX(minutes) FROM matches WHERE (winner_name = 'Nick Kyrgios' OR loser_name = 'Nick Kyrgios'); 266.0
153
+ What is the average height of Alexander Zverev when winning matches? SELECT AVG(winner_ht) FROM matches WHERE winner_name = 'Alexander Zverev'; 198.0
154
+ Who has the most wins at Wimbledon? SELECT winner_name, COUNT(*) AS win_count FROM matches WHERE tourney_name = 'Wimbledon' GROUP BY winner_name ORDER BY win_count DESC LIMIT 1; Roger Federer|106
155
+ Which player has the most match wins at the US Open since 2010? SELECT winner_name, COUNT(*) AS wins FROM matches WHERE tourney_name = 'US Open' AND tourney_date >= 20100101 GROUP BY winner_name ORDER BY wins DESC LIMIT 1; Novak Djokovic|53
156
+ What is the average match duration at Wimbledon? SELECT AVG(minutes) AS avg_duration FROM matches WHERE tourney_name = 'Wimbledon'; 130.965458422175
157
+ What is the longest match (in minutes) ever played at the US Open? SELECT MAX(minutes) FROM matches WHERE tourney_name = 'US Open'; 326.0
158
+ How many matches did Novak Djokovic win at Wimbledon? SELECT COUNT(*) FROM matches WHERE winner_name = 'Novak Djokovic' AND tourney_name = 'Wimbledon'; 95
159
+ Who has the most losses at the US Open? SELECT loser_name, COUNT(*) AS losses FROM matches WHERE tourney_name = 'US Open' GROUP BY loser_name ORDER BY losses DESC LIMIT 1; Sidney Burr Wood Jr|27
160
+ What is the average height of players who won a match at Wimbledon? SELECT AVG(winner_ht) AS avg_height FROM matches WHERE tourney_name = 'Wimbledon'; 185.318105616094
161
+ Which country has the most match winners at the US Open? SELECT winner_ioc, COUNT(*) AS wins FROM matches WHERE tourney_name = 'US Open' GROUP BY winner_ioc ORDER BY wins DESC LIMIT 1; USA|8527
162
+ What is the average age of winners at Wimbledon? SELECT AVG(winner_age) AS avg_winner_age FROM matches WHERE tourney_name = 'Wimbledon'; 26.7238638689215
163
+ What is the average number of minutes in matches that Rafael Nadal won at the US Open? SELECT AVG(minutes) FROM matches WHERE winner_name = 'Rafael Nadal' AND tourney_name = 'US Open'; 150.225806451613
164
+ What is the minimum age of any match winner at Wimbledon? SELECT MIN(winner_age) AS min_age FROM matches WHERE tourney_name = 'Wimbledon'; 14.13552361
165
+ How many matches went to “best of 5” at the US Open? SELECT COUNT(*) FROM matches WHERE tourney_name = 'US Open' AND best_of = '5'; 14144
166
+ Which player has the highest average minutes per match at Wimbledon? SELECT winner_name, AVG(minutes) AS avg_minutes FROM matches WHERE tourney_name = 'Wimbledon' GROUP BY winner_name ORDER BY avg_minutes DESC LIMIT 1; Pablo Andujar|302.0
167
+ What is the average height of losers at the US Open? SELECT AVG(loser_ht) AS avg_height FROM matches WHERE tourney_name = 'US Open'; 184.506371723972
168
+ Which player has the most match wins in the history of Wimbledon? SELECT winner_name, COUNT(*) AS wins FROM matches WHERE tourney_name = 'Wimbledon' GROUP BY winner_name ORDER BY wins DESC LIMIT 1; Roger Federer|106
169
+ Who has the most match losses at Wimbledon? SELECT loser_name, COUNT(*) AS losses FROM matches WHERE tourney_name = 'Wimbledon' GROUP BY loser_name ORDER BY losses DESC LIMIT 1; Wentworth Gore|28
170
+ What is the highest loser age ever recorded in a US Open match? SELECT MAX(loser_age) AS max_age FROM matches WHERE tourney_name = 'US Open'; 69.83983573
171
+ Which tournament (Wimbledon or US Open) has longer matches on average? SELECT tourney_name, AVG(minutes) AS avg_minutes FROM matches WHERE tourney_name IN ('Wimbledon', 'US Open') GROUP BY tourney_name ORDER BY avg_minutes DESC; US Open|135.031421838178 Wimbledon|130.965458422175
172
+ What is the best rank ever achieved by Roger Federer? SELECT MIN(rank) FROM rankings r JOIN players p ON r.player = p.player_id WHERE p.name = 'Roger Federer'; 1
173
+ What was Rafael Nadal’s average ranking? SELECT AVG(rank) FROM rankings r JOIN players p ON r.player = p.player_id WHERE p.name = 'Rafael Nadal'; 63.1526364477336
174
+ On how many ranking dates was Novak Djokovic ranked number 1? SELECT COUNT(*) FROM rankings r JOIN players p ON r.player = p.player_id WHERE p.name = 'Novak Djokovic' AND r.rank = 1; 377
175
+ What is the earliest ranking date recorded in the rankings table? SELECT MIN(ranking_date) FROM rankings; 19730827
176
+ Which player has the highest average ranking points overall? SELECT p.name, AVG(r.points) AS avg_points FROM rankings r JOIN players p ON r.player = p.player_id GROUP BY p.name ORDER BY avg_points DESC LIMIT 1; Novak Djokovic|7655.32489878542
177
+ How many times has Pete Sampras been ranked in the top 5? SELECT COUNT(*) AS top5_count FROM rankings r JOIN players p ON r.player = p.player_id WHERE p.name = 'Pete Sampras' AND r.rank <= 5; 509
178
+ What is the maximum number of ranking points John McEnroe ever had? SELECT MAX(points) FROM rankings r JOIN players p ON r.player = p.player_id WHERE p.name = 'John McEnroe'; 1506.0
179
+ How many distinct players appear in the rankings table? SELECT COUNT(DISTINCT player) AS distinct_players FROM rankings; 16174
180
+ How many times has Alexander Zverev been ranked inside the top 10? SELECT COUNT(*) FROM rankings r JOIN players p ON r.player = p.player_id WHERE p.name = 'Alexander Zverev' AND r.rank <= 10; 261
181
+ What is the highest ranking achieved by Carlos Alcaraz before 2022? SELECT MIN(rank) FROM rankings r JOIN players p ON r.player = p.player_id WHERE p.name = 'Carlos Alcaraz' AND r.ranking_date < 20220101; 32
182
+ How many times was Nick Kyrgios ranked? SELECT COUNT(*) FROM rankings r JOIN players p ON r.player = p.player_id WHERE p.name = 'Nick Kyrgios'; 512
183
+ What is the maximum number of ranking points held by any player on any date? SELECT MAX(points) AS max_points FROM rankings; 16950.0
184
+ What hand does Roger Federer play with? SELECT hand FROM players WHERE name = 'Roger Federer'; R
185
+ What country is Rafael Nadal from? SELECT ioc FROM players WHERE name = 'Rafael Nadal'; ESP
186
+ What is Novak Djokovic’s height? SELECT height FROM players WHERE name = 'Novak Djokovic'; 188.0
187
+ What is the date of birth of Andy Murray? SELECT dob FROM players WHERE name = 'Andy Murray'; 19870515
188
+ Which hand does Pete Sampras use? SELECT hand FROM players WHERE name = 'Pete Sampras'; R
189
+ What nationality is Alexander Zverev? SELECT ioc FROM players WHERE name = 'Alexander Zverev'; GER
190
+ How tall is John McEnroe? SELECT height FROM players WHERE name = 'John McEnroe'; 180.0
191
+ What is Andre Agassi’s dominant playing hand? SELECT hand FROM players WHERE name = 'Andre Agassi'; R
192
+ What is the average height of all players? SELECT AVG(height) FROM players; 183.74813763746
193
+ Which country has the tallest average players? SELECT ioc, AVG(height) AS avg_height FROM players GROUP BY ioc ORDER BY avg_height DESC LIMIT 1; YUG|194.0
194
+ How many right handed players are there? SELECT COUNT(*) FROM players WHERE hand = 'R'; 15666
195
+ Which player is the tallest? SELECT name, height FROM players ORDER BY height DESC LIMIT 1; Reilly Opelka|211.0
196
+ How many American (USA) players are in the database? SELECT COUNT(*) FROM players WHERE ioc = 'USA'; 13102
197
+ List the names and heights of all players taller than 205 cm. SELECT name, height FROM players WHERE height > 205; Greg Neuhart|206.0 Ivo Karlovic|208.0 John Isner|206.0 Reilly Opelka|211.0
198
+ What country is Carlos Alcaraz from? SELECT ioc FROM players WHERE name = 'Carlos Alcaraz'; ESP
199
+ What is Jannik Sinner’s date of birth? SELECT dob FROM players WHERE name = 'Jannik Sinner'; 20010816.0
200
+ How many spanish (ESP) players are there? SELECT COUNT(*) AS spanish_players FROM players WHERE ioc = 'ESP'; 3026
201
+ What is Nick Kyrgios’ height and dominant hand? SELECT height, hand FROM players WHERE name = 'Nick Kyrgios'; 193.0|R
202
+ What is the average height of right handed players? SELECT AVG(height) AS avg_height FROM players WHERE hand = 'right'; 183.806518151815
203
+ Who is the youngest player in the database? SELECT name, dob FROM players ORDER BY dob DESC LIMIT 1; Vito Antonio Darderi|20080113.0
204
+ How many players were born before 1980? SELECT COUNT(*) FROM players WHERE dob < 19800101; 15041
205
+ What is Taylor Fritz’s height and nationality? SELECT height, ioc FROM players WHERE name = 'Taylor Fritz'; 193.0|USA
utils/processing/combine_datasets.ipynb ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "id": "01d11866",
6
+ "metadata": {},
7
+ "source": [
8
+ "# Open nba and tennis datasets"
9
+ ]
10
+ },
11
+ {
12
+ "cell_type": "code",
13
+ "execution_count": 7,
14
+ "id": "155a7ecb",
15
+ "metadata": {},
16
+ "outputs": [
17
+ {
18
+ "name": "stdout",
19
+ "output_type": "stream",
20
+ "text": [
21
+ "Total NBA dataset examples: 600\n",
22
+ " natural_query \\\n",
23
+ "205 How many points did the home team score in the... \n",
24
+ "\n",
25
+ " sql_query result \n",
26
+ "205 SELECT pts_home FROM game WHERE game_id = (SEL... 122.0 \n",
27
+ "\n",
28
+ "\n",
29
+ "Total Tennis dataset examples: 204\n",
30
+ " natural_query \\\n",
31
+ "0 Get the full names of all players taller than ... \n",
32
+ "\n",
33
+ " sql_query result \n",
34
+ "0 SELECT name FROM players WHERE height > 210; Reilly|Opelka \n"
35
+ ]
36
+ },
37
+ {
38
+ "name": "stderr",
39
+ "output_type": "stream",
40
+ "text": [
41
+ "C:\\Users\\Dean\\AppData\\Local\\Temp\\ipykernel_21248\\149351044.py:11: FutureWarning: DataFrame.applymap has been deprecated. Use DataFrame.map instead.\n",
42
+ " nba_df.applymap(lambda x: re.sub(r'\\s+', ' ', x) if isinstance(x, str) else x)\n",
43
+ "C:\\Users\\Dean\\AppData\\Local\\Temp\\ipykernel_21248\\149351044.py:12: FutureWarning: DataFrame.applymap has been deprecated. Use DataFrame.map instead.\n",
44
+ " tennis_df.applymap(lambda x: re.sub(r'\\s+', ' ', x) if isinstance(x, str) else x)\n"
45
+ ]
46
+ }
47
+ ],
48
+ "source": [
49
+ "import pandas as pd\n",
50
+ "import re\n",
51
+ "\n",
52
+ "SAMPLE_SIZE = 600\n",
53
+ "\n",
54
+ "# Open two datasets\n",
55
+ "nba_df = pd.read_csv(\"../../training-data/nba_train_set.tsv\", sep='\\t')\n",
56
+ "tennis_df = pd.read_csv(\"../../training-data/tennis_train_set.tsv\", sep='\\t')\n",
57
+ "\n",
58
+ "# Fix any spacing issues\n",
59
+ "nba_df.applymap(lambda x: re.sub(r'\\s+', ' ', x) if isinstance(x, str) else x)\n",
60
+ "tennis_df.applymap(lambda x: re.sub(r'\\s+', ' ', x) if isinstance(x, str) else x)\n",
61
+ "\n",
62
+ "# Downsample NBA\n",
63
+ "nba_df = nba_df.sample(n=SAMPLE_SIZE)\n",
64
+ "\n",
65
+ "# Display dataset info\n",
66
+ "print(f\"Total NBA dataset examples: {len(nba_df)}\")\n",
67
+ "print(nba_df.head(1))\n",
68
+ "print()\n",
69
+ "print()\n",
70
+ "print(f\"Total Tennis dataset examples: {len(tennis_df)}\")\n",
71
+ "print(tennis_df.head(1))"
72
+ ]
73
+ },
74
+ {
75
+ "cell_type": "markdown",
76
+ "id": "eb357705",
77
+ "metadata": {},
78
+ "source": [
79
+ "# Combine into one tsv with extra column indicating which set each example belongs to"
80
+ ]
81
+ },
82
+ {
83
+ "cell_type": "code",
84
+ "execution_count": 11,
85
+ "id": "b3acd217",
86
+ "metadata": {},
87
+ "outputs": [
88
+ {
89
+ "name": "stdout",
90
+ "output_type": "stream",
91
+ "text": [
92
+ "Saved combined dataset with 804 rows\n"
93
+ ]
94
+ }
95
+ ],
96
+ "source": [
97
+ "# Add \"is_nba\" indicator column\n",
98
+ "nba_df[\"is_nba\"] = True\n",
99
+ "tennis_df[\"is_nba\"] = False\n",
100
+ "\n",
101
+ "# Combine into single dataframe, then shuffle\n",
102
+ "combined_df = pd.concat([nba_df, tennis_df], ignore_index=True)\n",
103
+ "combined_df = combined_df.sample(frac=1).reset_index(drop=True)\n",
104
+ "\n",
105
+ "\n",
106
+ "# Save to combined TSV\n",
107
+ "combined_df.to_csv(\"../../training-data/combined_dataset.tsv\", sep=\"\\t\", index=False)\n",
108
+ "print(\"Saved combined dataset with\", len(combined_df), \"rows\")"
109
+ ]
110
+ }
111
+ ],
112
+ "metadata": {
113
+ "kernelspec": {
114
+ "display_name": "Python 3",
115
+ "language": "python",
116
+ "name": "python3"
117
+ },
118
+ "language_info": {
119
+ "codemirror_mode": {
120
+ "name": "ipython",
121
+ "version": 3
122
+ },
123
+ "file_extension": ".py",
124
+ "mimetype": "text/x-python",
125
+ "name": "python",
126
+ "nbconvert_exporter": "python",
127
+ "pygments_lexer": "ipython3",
128
+ "version": "3.12.6"
129
+ }
130
+ },
131
+ "nbformat": 4,
132
+ "nbformat_minor": 5
133
+ }