Spaces:

dvwn
/

nl2sql-api

Running

dvwn commited on May 13

Commit

dfa643b

1 Parent(s): f160d1e

Update evaluation mode

- To evaluate which categories causes the low ex and esm.
- Adding new method for model registry on hf_engine.py file
- Adding models for testing

Files changed (5) hide show

src/nl2sql/__pycache__/hf_engine.cpython-313.pyc +0 -0
src/nl2sql/hf_engine.py +48 -5
src/scripts/evaluation_mode.py +82 -42
src/scripts/taxonomy_report.py +49 -0
src/scripts/test_cases.json +30 -0

src/nl2sql/__pycache__/hf_engine.cpython-313.pyc CHANGED Viewed

Binary files a/src/nl2sql/__pycache__/hf_engine.cpython-313.pyc and b/src/nl2sql/__pycache__/hf_engine.cpython-313.pyc differ

src/nl2sql/hf_engine.py CHANGED Viewed

@@ -2,12 +2,24 @@
 # This module defines the HuggingFace-based engine for generating SQL queries from natural language questions.
 import os
 from huggingface_hub import InferenceClient
 from langchain_core.language_models.llms import LLM
 from typing import Any, List, Optional
 # Default Model
 # DEFAULT_MODEL_ID = "defog/llama-3-sqlcoder-8b:featherless-ai"
-DEFAULT_MODEL_ID = "Qwen/Qwen2.5-Coder-7B-Instruct:featherless-ai"
 # Custom LangChain wrapper for HuggingFace Inference API
 class HFChatWrapper(LLM):
@@ -33,8 +45,9 @@ class HFChatWrapper(LLM):
         return "huggingface_inference_client"
 # Initialize the HuggingFace endpoint using the InferenceClient
-def get_llm(model_id: str = DEFAULT_MODEL_ID):
     """
     Initializes the HuggingFace InferenceClient and returns an LLM instance for generating SQL queries.
     """
     # Load HuggingFace API token from environment variable
@@ -42,10 +55,40 @@ def get_llm(model_id: str = DEFAULT_MODEL_ID):
     if not hf_token:
         raise ValueError("HuggingFace API token not found!")
     print(f"Initializing HuggingFace InferenceClient with model: {model_id}")
     # Initialize the HuggingFace InferenceClient
-    client = InferenceClient(api_key=hf_token)
-    llm = HFChatWrapper(client=client, model_id=model_id)
-    return llm

 # This module defines the HuggingFace-based engine for generating SQL queries from natural language questions.
 import os
 from huggingface_hub import InferenceClient
+from langchain_huggingface import HuggingFaceEndpoint
 from langchain_core.language_models.llms import LLM
 from typing import Any, List, Optional
 # Default Model
 # DEFAULT_MODEL_ID = "defog/llama-3-sqlcoder-8b:featherless-ai"
+# DEFAULT_MODEL_ID = "defog/sqlcoder-7b-2"
+# DEFAULT_MODEL_ID = "Qwen/Qwen2.5-Coder-7B-Instruct:featherless-ai"
+# Model Registry: Add several model to be tested
+MODEL_REGISTRY = {
+    "defog/sqlcoder-7b-2": "text",
+    "Qwen/Qwen2.5-Coder-7B-Instruct:featherless-ai": "chat",
+    "Qwen/Qwen2.5-Coder-32B-Instruct:featherless-ai": "chat",
+    "defog/llama-3-sqlcoder-8b:featherless-ai": "chat"
+    #"deepseek-ai/DeepSeek-R1-Distill-Qwen-32B:featherless-ai": "chat"
+}
+ACTIVE_MODEL_ID = "Qwen/Qwen2.5-Coder-32B-Instruct:featherless-ai"
 # Custom LangChain wrapper for HuggingFace Inference API
 class HFChatWrapper(LLM):
         return "huggingface_inference_client"
 # Initialize the HuggingFace endpoint using the InferenceClient
+def get_llm(model_id: str = ACTIVE_MODEL_ID):
     """
+    Automatically detects the model type and returns the correct LangChain interface.
     Initializes the HuggingFace InferenceClient and returns an LLM instance for generating SQL queries.
     """
     # Load HuggingFace API token from environment variable
     if not hf_token:
         raise ValueError("HuggingFace API token not found!")
+    model_type = MODEL_REGISTRY.get(model_id, "chat")
     print(f"Initializing HuggingFace InferenceClient with model: {model_id}")
+    if model_type == "chat":
+        client = InferenceClient(api_key=hf_token)
+        return HFChatWrapper(client=client, model_id=model_id)
+    elif model_type == "text":
+        # Route to standard Text Generation API
+        return HuggingFaceEndpoint(
+            repo_id=model_id,
+            task="text-generation",
+            max_new_tokens=512,
+            temperature=0.0,
+            huggingfacehub_api_token=hf_token,
+            do_sample=False,
+            return_full_text=False
+        )
+    else:
+        raise ValueError(f"Unknown model type: {model_type}")
     # Initialize the HuggingFace InferenceClient
+    #client = InferenceClient(api_key=hf_token)
+    #llm = HFChatWrapper(client=client, model_id=model_id)
+    #return llm
+if __name__=="__main__":
+    from dotenv import load_dotenv
+    load_dotenv()
+    try:
+        test_llm = get_llm()
+        print("Model loaded successfully! Running a quick ping...")
+        response = test_llm.invoke("write a single SQL statement to count all rows in a table name 'Employee'.")
+        print(f"\nResponse:\n{response}")
+    except Exception as e:
+        print(f"Error during LLM initialization: {e}")

src/scripts/evaluation_mode.py CHANGED Viewed

@@ -1,44 +1,75 @@
 # Path: src/scripts/evaluation_mode.py
 # Evaluation script for Hugging Face SQL generation.
 import json
 from pathlib import Path
 import pandas as pd
 from src.database.db_manager import get_db_connection
 from src.nl2sql.sql_agent import nl2sql_agent
 TEST_CASES_PATH = Path("src/scripts/test_cases.json")
 RESULTS_PATH = Path("hf_evaluation_results.json")
 def _normalize_dataframe(dataframe: pd.DataFrame) -> pd.DataFrame:
     # Normalize dataframe to ensure accurate comparison
     normalized = dataframe.copy()
-    normalized.columns = [str(column).lower() for column in normalized.columns]
     for column in normalized.columns:
         normalized[column] = normalized[column].map(
             lambda value: round(float(value), 6)
-            if isinstance(value, float)
             else value
         )
     sort_columns = list(normalized.columns)
     if sort_columns:
-        normalized = normalized.sort_values(by=sort_columns, kind="mergesort").reset_index(drop=True)
     return normalized
-# Compare generated SQL results with expected results
-def compare_results(df_generated: pd.DataFrame, df_gold: pd.DataFrame) -> bool:
-    """Compare generated and expected query results."""
     if df_generated is None or df_gold is None:
         return False
     try:
         normalized_generated = _normalize_dataframe(df_generated)
         normalized_gold = _normalize_dataframe(df_gold)
-        return normalized_generated.equals(normalized_gold)
     except Exception as error:
-        print(f"Error comparing results: {error}")
         return False
 def run_evaluation():
@@ -50,58 +81,67 @@ def run_evaluation():
         test_cases = json.load(handle)
     results = []
-    correct_count = 0
     print(f"Running evaluation on {len(test_cases)} test cases...\n")
     for case in test_cases:
-        question = case["question"]
-        print(f"Testing ID {case['id']}: {question[:50]}...")
         # Implement agent to handle RAG retrieval and SQL generation
         agent_response = nl2sql_agent(user_question=question)
         generated_sql = agent_response.get("query", "")
         connection = get_db_connection()
         if connection is None:
             raise RuntimeError("Unable to connect to the SQLite database.")
         try:
             df_generated = pd.read_sql_query(generated_sql, connection)
-            df_gold = pd.read_sql_query(case["gold_sql"], connection)
-            is_correct = compare_results(df_generated, df_gold)
-            if is_correct:
-                correct_count += 1
-            results.append(
-                {
-                    "id": case["id"],
-                    "question": question,
-                    "status": "PASS" if is_correct else "FAIL",
-                    "generated_sql": generated_sql,
-                    "gold_sql": case["gold_sql"],
-                }
-            )
         except Exception as error:
-            results.append(
-                {
-                    "id": case["id"],
-                    "question": question,
-                    "status": "ERROR",
-                    "generated_sql": generated_sql,
-                    "gold_sql": case["gold_sql"],
-                    "error": str(error),
-                }
-            )
         finally:
             connection.close()
-    accuracy = (correct_count / len(test_cases)) * 100 if test_cases else 0.0
-    print("\nEVALUATION COMPLETE")
-    print(f"Total Test Cases: {len(test_cases)}")
-    print(f"Correctly Generated SQL: {correct_count} / {len(test_cases)}")
-    print(f"Execution Accuracy: {accuracy:.2f}%")
     with RESULTS_PATH.open("w", encoding="utf-8") as handle:
-        json.dump(results, handle, indent=4)

 # Path: src/scripts/evaluation_mode.py
 # Evaluation script for Hugging Face SQL generation.
 import json
+import sqlglot
 from pathlib import Path
 import pandas as pd
 from src.database.db_manager import get_db_connection
 from src.nl2sql.sql_agent import nl2sql_agent
+from src.scripts.taxonomy_report import print_taxonomyReport
 TEST_CASES_PATH = Path("src/scripts/test_cases.json")
 RESULTS_PATH = Path("hf_evaluation_results.json")
 def _normalize_dataframe(dataframe: pd.DataFrame) -> pd.DataFrame:
     # Normalize dataframe to ensure accurate comparison
+    """
+    Standardize dataframes for Execution Accuracy (EX).
+    - Ensures Order Agnoticism by sorting all values.
+    - Prepares for Column Agnoticism by focuing on value comparison rather than column names.
+    """
     normalized = dataframe.copy()
+    #normalized.columns = [str(column).lower() for column in normalized.columns]
     for column in normalized.columns:
         normalized[column] = normalized[column].map(
             lambda value: round(float(value), 6)
+            if isinstance(value, (float, int))
             else value
         )
     sort_columns = list(normalized.columns)
     if sort_columns:
+        normalized = normalized.sort_values(by=sort_columns).reset_index(drop=True)
     return normalized
+# EX: Compare generated SQL results with expected results
+def calculate_ex(df_generated: pd.DataFrame, df_gold: pd.DataFrame) -> bool:
+    """
+    Execution Accuracy (EX): Compare generated SQL results with expected results.
+    - Column Name Agnostic: Use .values to ignore header differences.
+    """
     if df_generated is None or df_gold is None:
         return False
     try:
         normalized_generated = _normalize_dataframe(df_generated)
         normalized_gold = _normalize_dataframe(df_gold)
+        if normalized_generated.shape != normalized_gold.shape:
+            return False
+        return bool((normalized_generated.values == normalized_gold.values).all())
+        # return normalized_generated.equals(normalized_gold)
+    except Exception as error:
+        print(f"EX Evaluation Error: {error}")
+        return False
+def calculate_esm(generated_sql: str, gold_sql: str) -> bool:
+    """
+    Exact Set Match (ESM): Compare AST structure using sqlglot.
+    - Ignores formatting, capitalization, and minor syntactic sugar.
+    """
+    try:
+        # Parse both SQL queries into expressions
+        generated_exp = sqlglot.parse_one(generated_sql, read=None)
+        gold_exp = sqlglot.parse_one(gold_sql, read=None)
+        # Compare the expressions for structural equivalence
+        return generated_exp == gold_exp
     except Exception as error:
+        print(f"ESM Evaluation Error: {error}")
         return False
 def run_evaluation():
         test_cases = json.load(handle)
     results = []
+    ex_count = 0
+    esm_count = 0
     print(f"Running evaluation on {len(test_cases)} test cases...\n")
     for case in test_cases:
+        id = case.get("id")
+        question = case.get("question")
+        gold_sql = case.get("gold_sql")
+        taxonomy = case.get("taxonomy", "Unknown")
+        # print(f"Testing ID {id}: {question[:50]}...")
         # Implement agent to handle RAG retrieval and SQL generation
         agent_response = nl2sql_agent(user_question=question)
         generated_sql = agent_response.get("query", "")
+        # ESM Evaluation
+        esm_result = calculate_esm(generated_sql, gold_sql)
+        if esm_result:
+            esm_count += 1
+        # EX Evaluation
+        ex_result = False
         connection = get_db_connection()
         if connection is None:
             raise RuntimeError("Unable to connect to the SQLite database.")
         try:
             df_generated = pd.read_sql_query(generated_sql, connection)
+            df_gold = pd.read_sql_query(gold_sql, connection)
+            ex_result = calculate_ex(df_generated, df_gold)
+            if ex_result:
+                ex_count += 1
         except Exception as error:
+            print(f"Error executing SQL for ID {id}: {error}")
         finally:
             connection.close()
+        results.append({
+            "id": id,
+            "question": question,
+            "taxonomy": taxonomy,
+            "ex_pass": ex_result,
+            "esm_pass": esm_result,
+            "generated_sql": generated_sql,
+            "gold_sql": gold_sql
+        })
+    # Summary Statistics
+    total = len(test_cases)
+    ex_accuracy = (ex_count / total) * 100 if total > 0 else 0
+    esm_accuracy = (esm_count / total) * 100 if total > 0 else 0
+    print("\nEVALUATION SUMMARY")
+    print("-" * 40)
+    print(f"Total Test Cases: {total}")
+    print(f"Execution Accuracy (EX): {ex_accuracy:.2f}% ({ex_count}/{total})")
+    print(f"Exact Set Match (ESM): {esm_accuracy:.2f}% ({esm_count}/{total})")
     with RESULTS_PATH.open("w", encoding="utf-8") as handle:
+        json.dump(results, handle, indent=4)
+    print_taxonomyReport(results)

src/scripts/taxonomy_report.py ADDED Viewed

	@@ -0,0 +1,49 @@

+# Path: src/scripts/taxonomy_report.py
+# Generate a taxonomy report to identify which taxonomy tags model struggles with
+import json
+import pandas as pd
+from pathlib import Path
+def print_taxonomyReport(results_data):
+    """
+    Generates and prints taxonomy breakdown.
+    Accepts either a list of dictionaries (from memory) or reads from the default JSON
+    """
+    if not results_data:
+        results_path = Path("hf_evaluation_results.json")
+        if results_path.exists():
+            with open(results_path, "r", encoding="utf-8") as f:
+                results_data = json.load(f)
+        else:
+            print("No data provided and results file not found.")
+            return
+    if not results_data:
+        return
+    df = pd.DataFrame(results_data)
+    df['taxonomy'] = df['taxonomy'].fillna("Unknown").astype(str)
+    df['taxonomy'] = df['taxonomy'].str.split(', ')
+    df_exploded = df.explode('taxonomy')
+    # Calculate Accuract per Taxonomy Tag
+    taxonomy_summary = df_exploded.groupby('taxonomy').agg(
+        total_cases = ('id', 'count'),
+        ex_passed = ('ex_pass', 'sum'),
+        esm_passed = ('esm_pass', 'sum')
+    )
+    taxonomy_summary['ex_acc'] = (taxonomy_summary['ex_passed'] / taxonomy_summary['total_cases']) * 100
+    taxonomy_summary['esm_acc'] = (taxonomy_summary['esm_passed'] / taxonomy_summary['total_cases']) * 100
+    print("\n" + "="*50)
+    print("TAXONOMY PERFORMANCE REPORT SUMMARY")
+    print("-"*50)
+    # Sort by execution accuracy
+    final_report = taxonomy_summary.sort_values(by='ex_acc', ascending=False)
+    print(final_report.to_string())
+# To run the script on its own manually
+if __name__ == "__main__":
+    print_taxonomyReport(None)

src/scripts/test_cases.json CHANGED Viewed

@@ -1,76 +1,106 @@
 [
   {
     "id": 1,
     "question": "List all the artists name in the database.",
     "gold_sql": "SELECT Name FROM Artist;"
   },
   {
     "id": 2,
     "question": "How many genres are there?",
     "gold_sql": "SELECT COUNT(*) FROM Genre;"
   },
   {
     "id": 3,
     "question": "List the names of the first 5 tracks.",
     "gold_sql": "SELECT Name FROM Track LIMIT 5;"
   },
   {
     "id": 4,
     "question": "Count the number of customers located in the USA.",
     "gold_sql": "SELECT COUNT(*) FROM Customer WHERE Country = 'USA';"
   },
   {
     "id": 5,
     "question": "Find all invoices for the customer with ID 1.",
     "gold_sql": "SELECT * FROM Invoice WHERE CustomerId = 1;"
   },
   {
     "id": 6,
     "question": "List each album title along with the artist's name.",
     "gold_sql": "SELECT Album.Title, Artist.Name FROM Album JOIN Artist ON Album.ArtistId = Artist.ArtistId;"
   },
   {
     "id": 7,
     "question": "How many tracks belong to the 'Rock' genre?",
     "gold_sql": "SELECT COUNT(*) FROM Track JOIN Genre ON Track.GenreId = Genre.GenreId WHERE Genre.Name = 'Rock';"
   },
   {
     "id": 8,
     "question": "Show the total revenue generated from each country.",
     "gold_sql": "SELECT BillingCountry, SUM(Total) FROM Invoice GROUP BY BillingCountry;"
   },
   {
     "id": 9,
     "question": "Find the total number of items sold for each media type.",
     "gold_sql": "SELECT MediaType.Name, SUM(InvoiceLine.Quantity) FROM InvoiceLine JOIN Track ON InvoiceLine.TrackId = Track.TrackId JOIN MediaType ON Track.MediaTypeId = MediaType.MediaTypeId GROUP BY MediaType.Name;"
   },
   {
     "id": 10,
     "question": "List the first and last names of all employees who are Sales Support Agents.",
     "gold_sql": "SELECT FirstName, LastName FROM Employee WHERE Title = 'Sales Support Agent';"
   },
   {
     "id": 11,
     "question": "List the top 5 customers who have spent the most money in total.",
     "gold_sql": "SELECT c.FirstName, c.LastName, SUM(i.Total) as TotalSpent FROM Customer c JOIN Invoice i ON c.CustomerId = i.CustomerId GROUP BY c.CustomerId ORDER BY TotalSpent DESC LIMIT 5;"
   },
   {
     "id": 12,
     "question": "Which artist has the most tracks in the database? Give the name and count.",
     "gold_sql": "SELECT ar.Name, COUNT(t.TrackId) as TrackCount FROM Artist ar JOIN Album al ON ar.ArtistId = al.ArtistId JOIN Track t ON al.AlbumId = t.AlbumId GROUP BY ar.ArtistId ORDER BY TrackCount DESC LIMIT 1;"
   },
   {
     "id": 13,
     "question": "Which genres have more than 100 tracks? List the genre name and count.",
     "gold_sql": "SELECT g.Name, COUNT(t.TrackId) as TrackCount FROM Genre g JOIN Track t ON g.GenreId = t.GenreId GROUP BY g.GenreId HAVING TrackCount > 100;"
   },
   {
     "id": 14,
     "question": "Calculate the average track length in seconds for each genre.",
     "gold_sql": "SELECT g.Name, AVG(t.Milliseconds) / 1000.0 as AvgSeconds FROM Genre g JOIN Track t ON g.GenreId = t.GenreId GROUP BY g.GenreId;"
   },
   {
     "id": 15,
     "question": "Identify the artist who has earned the most revenue from customers in Canada.",
     "gold_sql": "SELECT ar.Name, SUM(il.UnitPrice * il.Quantity) AS Revenue FROM Artist ar JOIN Album al ON ar.ArtistId = al.ArtistId JOIN Track t ON al.AlbumId = t.AlbumId JOIN InvoiceLine il ON t.TrackId = il.TrackId JOIN Invoice i ON il.InvoiceId = i.InvoiceId WHERE i.BillingCountry = 'Canada' GROUP BY ar.ArtistId ORDER BY Revenue DESC LIMIT 1;"
   }

 [
   {
     "id": 1,
+    "difficulty": "easy",
+    "taxonomy": "Selection",
     "question": "List all the artists name in the database.",
     "gold_sql": "SELECT Name FROM Artist;"
   },
   {
     "id": 2,
+    "difficulty": "easy",
+    "taxonomy": "Aggregation",
     "question": "How many genres are there?",
     "gold_sql": "SELECT COUNT(*) FROM Genre;"
   },
   {
     "id": 3,
+    "difficulty": "easy",
+    "taxonomy": "Selection, Limit",
     "question": "List the names of the first 5 tracks.",
     "gold_sql": "SELECT Name FROM Track LIMIT 5;"
   },
   {
     "id": 4,
+    "difficulty": "easy",
+    "taxonomy": "Aggregation, Filtering",
     "question": "Count the number of customers located in the USA.",
     "gold_sql": "SELECT COUNT(*) FROM Customer WHERE Country = 'USA';"
   },
   {
     "id": 5,
+    "difficulty": "easy",
+    "taxonomy": "Selection, Filtering",
     "question": "Find all invoices for the customer with ID 1.",
     "gold_sql": "SELECT * FROM Invoice WHERE CustomerId = 1;"
   },
   {
     "id": 6,
+    "difficulty": "medium",
+    "taxonomy": "Simple Join",
     "question": "List each album title along with the artist's name.",
     "gold_sql": "SELECT Album.Title, Artist.Name FROM Album JOIN Artist ON Album.ArtistId = Artist.ArtistId;"
   },
   {
     "id": 7,
+    "difficulty": "medium",
+    "taxonomy": "Simple Join, Filtering, Aggregation",
     "question": "How many tracks belong to the 'Rock' genre?",
     "gold_sql": "SELECT COUNT(*) FROM Track JOIN Genre ON Track.GenreId = Genre.GenreId WHERE Genre.Name = 'Rock';"
   },
   {
     "id": 8,
+    "difficulty": "medium",
+    "taxonomy": "Aggregation, Grouping",
     "question": "Show the total revenue generated from each country.",
     "gold_sql": "SELECT BillingCountry, SUM(Total) FROM Invoice GROUP BY BillingCountry;"
   },
   {
     "id": 9,
+    "difficulty": "medium",
+    "taxonomy": "Multi-Join, Aggregation, Grouping",
     "question": "Find the total number of items sold for each media type.",
     "gold_sql": "SELECT MediaType.Name, SUM(InvoiceLine.Quantity) FROM InvoiceLine JOIN Track ON InvoiceLine.TrackId = Track.TrackId JOIN MediaType ON Track.MediaTypeId = MediaType.MediaTypeId GROUP BY MediaType.Name;"
   },
   {
     "id": 10,
+    "difficulty": "easy",
+    "taxonomy": "Selection, Filtering",
     "question": "List the first and last names of all employees who are Sales Support Agents.",
     "gold_sql": "SELECT FirstName, LastName FROM Employee WHERE Title = 'Sales Support Agent';"
   },
   {
     "id": 11,
+    "difficulty": "medium",
+    "taxonomy": "Simple Join, Aggregation, Grouping, Ordering, Limit",
     "question": "List the top 5 customers who have spent the most money in total.",
     "gold_sql": "SELECT c.FirstName, c.LastName, SUM(i.Total) as TotalSpent FROM Customer c JOIN Invoice i ON c.CustomerId = i.CustomerId GROUP BY c.CustomerId ORDER BY TotalSpent DESC LIMIT 5;"
   },
   {
     "id": 12,
+    "difficulty": "hard",
+    "taxonomy": "Multi-Join, Aggregation, Grouping, Ordering, Limit",
     "question": "Which artist has the most tracks in the database? Give the name and count.",
     "gold_sql": "SELECT ar.Name, COUNT(t.TrackId) as TrackCount FROM Artist ar JOIN Album al ON ar.ArtistId = al.ArtistId JOIN Track t ON al.AlbumId = t.AlbumId GROUP BY ar.ArtistId ORDER BY TrackCount DESC LIMIT 1;"
   },
   {
     "id": 13,
+    "difficulty": "medium",
+    "taxonomy": "Simple Join, Aggregation, Grouping, Having",
     "question": "Which genres have more than 100 tracks? List the genre name and count.",
     "gold_sql": "SELECT g.Name, COUNT(t.TrackId) as TrackCount FROM Genre g JOIN Track t ON g.GenreId = t.GenreId GROUP BY g.GenreId HAVING TrackCount > 100;"
   },
   {
     "id": 14,
+    "difficulty": "medium",
+    "taxonomy": "Simple Join, Aggregation, Arithmetic, Grouping",
     "question": "Calculate the average track length in seconds for each genre.",
     "gold_sql": "SELECT g.Name, AVG(t.Milliseconds) / 1000.0 as AvgSeconds FROM Genre g JOIN Track t ON g.GenreId = t.GenreId GROUP BY g.GenreId;"
   },
   {
     "id": 15,
+    "difficulty": "hard",
+    "taxonomy": "Multi-Join, Aggregation, Grouping, Ordering, Limit",
     "question": "Identify the artist who has earned the most revenue from customers in Canada.",
     "gold_sql": "SELECT ar.Name, SUM(il.UnitPrice * il.Quantity) AS Revenue FROM Artist ar JOIN Album al ON ar.ArtistId = al.ArtistId JOIN Track t ON al.AlbumId = t.AlbumId JOIN InvoiceLine il ON t.TrackId = il.TrackId JOIN Invoice i ON il.InvoiceId = i.InvoiceId WHERE i.BillingCountry = 'Canada' GROUP BY ar.ArtistId ORDER BY Revenue DESC LIMIT 1;"
   }