Spaces:

dvwn
/

nl2sql-api

Sleeping

+[
+    {
+        "id": 1,
+        "question": "List all the artists name in the database.",
+        "status": "PASS",
+        "generated_sql": "SELECT a.Name FROM Artist a;",
+        "gold_sql": "SELECT Name FROM Artist;"
+    },
+    {
+        "id": 2,
+        "question": "How many genres are there?",
+        "status": "FAIL",
+        "generated_sql": "SELECT COUNT(DISTINCT g.GenreId) FROM Genre g;",
+        "gold_sql": "SELECT COUNT(*) FROM Genre;"
+    },
+    {
+        "id": 3,
+        "question": "List the names of the first 5 tracks.",
+        "status": "PASS",
+        "generated_sql": "SELECT t.Name FROM Track t ORDER BY t.TrackId LIMIT 5;",
+        "gold_sql": "SELECT Name FROM Track LIMIT 5;"
+    },
+    {
+        "id": 4,
+        "question": "Count the number of customers located in the USA.",
+        "status": "FAIL",
+        "generated_sql": "SELECT COUNT(DISTINCT c.CustomerId) FROM Customer c WHERE c.Country = 'USA';",
+        "gold_sql": "SELECT COUNT(*) FROM Customer WHERE Country = 'USA';"
+    },
+    {
+        "id": 5,
+        "question": "Find all invoices for the customer with ID 1.",
+        "status": "FAIL",
+        "generated_sql": "SELECT i.InvoiceId, i.InvoiceDate, i.Total FROM Invoice i JOIN Customer c ON i.CustomerId = c.CustomerId WHERE c.CustomerId = 1;",
+        "gold_sql": "SELECT * FROM Invoice WHERE CustomerId = 1;"
+    },
+    {
+        "id": 6,
+        "question": "List each album title along with the artist's name.",
+        "status": "PASS",
+        "generated_sql": "SELECT a.Title, ar.Name FROM Album a JOIN Artist ar ON a.ArtistId = ar.ArtistId;",
+        "gold_sql": "SELECT Album.Title, Artist.Name FROM Album JOIN Artist ON Album.ArtistId = Artist.ArtistId;"
+    },
+    {
+        "id": 7,
+        "question": "How many tracks belong to the 'Rock' genre?",
+        "status": "FAIL",
+        "generated_sql": "SELECT COUNT(t.TrackId) FROM Track t JOIN Genre g ON t.GenreId = g.GenreId WHERE LOWER(g.Name) = 'rock';",
+        "gold_sql": "SELECT COUNT(*) FROM Track JOIN Genre ON Track.GenreId = Genre.GenreId WHERE Genre.Name = 'Rock';"
+    },
+    {
+        "id": 8,
+        "question": "Show the total revenue generated from each country.",
+        "status": "FAIL",
+        "generated_sql": "SELECT i.BillingCountry, SUM(i.Total) AS TotalRevenue FROM Invoice i GROUP BY i.BillingCountry ORDER BY TotalRevenue DESC NULLS LAST;",
+        "gold_sql": "SELECT BillingCountry, SUM(Total) FROM Invoice GROUP BY BillingCountry;"
+    },
+    {
+        "id": 9,
+        "question": "Find the total number of items sold for each media type.",
+        "status": "FAIL",
+        "generated_sql": "SELECT mt.Name AS MediaType, SUM(il.Quantity) AS TotalQuantity FROM InvoiceLine il JOIN Track t ON il.TrackId = t.TrackId JOIN MediaType mt ON t.MediaTypeId = mt.MediaTypeId GROUP BY mt.Name ORDER BY TotalQuantity DESC NULLS LAST;",
+        "gold_sql": "SELECT MediaType.Name, COUNT(InvoiceLine.TrackId) FROM InvoiceLine JOIN Track ON InvoiceLine.TrackId = Track.TrackId JOIN MediaType ON Track.MediaTypeId = MediaType.MediaTypeId GROUP BY MediaType.Name;"
+    },
+    {
+        "id": 10,
+        "question": "List the first and last names of all employees who are Sales Support Agents.",
+        "status": "PASS",
+        "generated_sql": "SELECT e.FirstName, e.LastName FROM Employee e WHERE e.Title = 'Sales Support Agent';",
+        "gold_sql": "SELECT FirstName, LastName FROM Employee WHERE Title = 'Sales Support Agent';"
+    },
+    {
+        "id": 11,
+        "question": "List the top 5 customers who have spent the most money in total.",
+        "status": "FAIL",
+        "generated_sql": "SELECT c.CustomerId, SUM(i.Total) AS total_spent FROM Invoice i JOIN Customer c ON i.CustomerId = c.CustomerId GROUP BY c.CustomerId ORDER BY total_spent DESC LIMIT 5;",
+        "gold_sql": "SELECT c.FirstName, c.LastName, SUM(i.Total) as TotalSpent FROM Customer c JOIN Invoice i ON c.CustomerId = i.CustomerId GROUP BY c.CustomerId ORDER BY TotalSpent DESC LIMIT 5;"
+    },
+    {
+        "id": 12,
+        "question": "Which artist has the most tracks in the database? Give the name and count.",
+        "status": "ERROR",
+        "generated_sql": "SELECT a.Name, COUNT(t.TrackId) AS track_count FROM Track t JOIN Album a ON t.AlbumId = a.AlbumId GROUP BY a.Name ORDER BY track_count DESC LIMIT 1;",
+        "gold_sql": "SELECT ar.Name, COUNT(t.TrackId) as TrackCount FROM Artist ar JOIN Album al ON ar.ArtistId = al.ArtistId JOIN Track t ON al.AlbumId = t.AlbumId GROUP BY ar.ArtistId ORDER BY TrackCount DESC LIMIT 1;",
+        "error": "Execution failed on sql 'SELECT a.Name, COUNT(t.TrackId) AS track_count FROM Track t JOIN Album a ON t.AlbumId = a.AlbumId GROUP BY a.Name ORDER BY track_count DESC LIMIT 1;': no such column: a.Name"
+    },
+    {
+        "id": 13,
+        "question": "Which genres have more than 100 tracks? List the genre name and count.",
+        "status": "FAIL",
+        "generated_sql": "SELECT g.Name, COUNT(t.TrackId) AS track_count FROM Track t JOIN Genre g ON t.GenreId = g.GenreId GROUP BY g.Name HAVING COUNT(t.TrackId) > 100 ORDER BY track_count DESC NULLS LAST;",
+        "gold_sql": "SELECT g.Name, COUNT(t.TrackId) as TrackCount FROM Genre g JOIN Track t ON g.GenreId = t.GenreId GROUP BY g.GenreId HAVING TrackCount > 100;"
+    },
+    {
+        "id": 14,
+        "question": "Calculate the average track length in seconds for each genre.",
+        "status": "FAIL",
+        "generated_sql": "SELECT g.Name, AVG(t.Milliseconds) AS average_length FROM Track t JOIN Genre g ON t.GenreId = g.GenreId GROUP BY g.Name ORDER BY average_length NULLS LAST;",
+        "gold_sql": "SELECT g.Name, AVG(t.Milliseconds) / 1000.0 as AvgSeconds FROM Genre g JOIN Track t ON g.GenreId = t.GenreId GROUP BY g.GenreId;"
+    },
+    {
+        "id": 15,
+        "question": "Identify the artist who has earned the most revenue from customers in Canada.",
+        "status": "ERROR",
+        "generated_sql": "SELECT a.Name, SUM(i.Total) AS TotalRevenue FROM Invoice i JOIN Customer c ON i.CustomerId = c.CustomerId JOIN Album a ON c.SupportRepId = a.ArtistId WHERE c.Country = 'Canada' GROUP BY a.Name ORDER BY TotalRevenue DESC LIMIT 1;",
+        "gold_sql": "SELECT ar.Name, SUM(il.UnitPrice * il.Quantity) AS Revenue FROM Artist ar JOIN Album al ON ar.ArtistId = al.ArtistId JOIN Track t ON al.AlbumId = t.AlbumId JOIN InvoiceLine il ON t.TrackId = il.TrackId JOIN Invoice i ON il.InvoiceId = i.InvoiceId WHERE i.BillingCountry = 'Canada' GROUP BY ar.ArtistId ORDER BY Revenue DESC LIMIT 1;",
+        "error": "Execution failed on sql 'SELECT a.Name, SUM(i.Total) AS TotalRevenue FROM Invoice i JOIN Customer c ON i.CustomerId = c.CustomerId JOIN Album a ON c.SupportRepId = a.ArtistId WHERE c.Country = 'Canada' GROUP BY a.Name ORDER BY TotalRevenue DESC LIMIT 1;': no such column: a.Name"
+    }
+]

hf_test_bench.py ADDED Viewed

	@@ -0,0 +1,29 @@

+# Test the Hugging Face inference
+from src.nl2sql.hf_engine import generate_sql
+from src.database.db_manager import get_db_connection, get_schema_context
+import pandas as pd
+def test_single_query():
+    print("Initializing Featherless AI SQL generation test...")
+    # Fetch the database schema context (ddl) from Chinook
+    ddl = get_schema_context
+    question = "Identify the artist who has earned the most revenue from customers in Canada."
+    try:
+        generated_sql = generate_sql(question, ddl)
+        print(f"\nGenerated SQL:\n{generated_sql}\n")
+        # Connect to the database and execute the generated SQL
+        connection = get_db_connection()
+        df = pd.read_sql_query(generated_sql, connection)
+        connection.close()
+        print("\nDatabase Query Result:")
+        print(df)
+        print("\nTest completed successfully: API connected and SQL is valid.")
+    except Exception as e:
+        print(f"\nTest failed: {e}")
+if __name__ == "__main__":
+    test_single_query()

requirements.txt CHANGED Viewed

Binary files a/requirements.txt and b/requirements.txt differ

src/database/__pycache__/db_manager.cpython-313.pyc CHANGED Viewed

Binary files a/src/database/__pycache__/db_manager.cpython-313.pyc and b/src/database/__pycache__/db_manager.cpython-313.pyc differ

src/database/db_manager.py CHANGED Viewed

@@ -1,48 +1,234 @@
-# This module provides a function to establish a connection to the SQLite database used in the NL2SQL project. It also includes a test block to verify the connection and list the tables in the database.
-import sqlite3
 import os
-# Get the path to the database file
-DB_PATH = os.path.join(os.path.dirname(__file__), 'Chinook_Sqlite.sqlite')
 def get_db_connection():
-    """Establishes a connection to the SQLite database."""
     try:
         connection = sqlite3.connect(DB_PATH)
         return connection
-    except sqlite3.Error as e:
-        print(f"Error connecting to database: {e}")
         return None
-# Test the database connection
 if __name__ == "__main__":
     connection = get_db_connection()
     if connection:
         print("Database connection successful!")
         cursor = connection.cursor()
         cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
-        print("Tables in the database:", cursor.fetchall())
         connection.close()
     else:
         print("Failed to connect to the database.")
-# Extract Schema Information for LLM Prompts
-def get_schema_context():
-    """Extracts the database schema information to be used in LLM prompts."""
-    connection = get_db_connection()
-    if not connection:
-        return "Unable to connect to the database to retrieve schema information."
-    cursor = connection.cursor()
-    cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
-    tables = [t[0] for t in cursor.fetchall() if not t[0].startswith('sqlite_')]
-    schema_text = ""
-    for table in tables:
-        cursor.execute(f"PRAGMA table_info({table});")
-        columns = [f"{c[1]} ({c[2]})" for c in cursor.fetchall()]
-        schema_text += f"Table {table}: {', '.join(columns)}\n"
-    connection.close()
-    return schema_text

+#"""Database helpers for the NL2SQL project."""
 import os
+import re
+import sqlite3
+from typing import Dict, List
+DB_PATH = os.path.join(os.path.dirname(__file__), "Chinook_Sqlite.sqlite")
+STOPWORDS = {
+    "a",
+    "all",
+    "an",
+    "and",
+    "are",
+    "as",
+    "at",
+    "by",
+    "count",
+    "each",
+    "find",
+    "for",
+    "from",
+    "give",
+    "has",
+    "have",
+    "how",
+    "in",
+    "is",
+    "list",
+    "many",
+    "most",
+    "name",
+    "names",
+    "of",
+    "on",
+    "show",
+    "the",
+    "their",
+    "there",
+    "to",
+    "total",
+    "what",
+    "which",
+    "who",
+    "with",
+}
 def get_db_connection():
+    """Establish a connection to the SQLite database."""
     try:
         connection = sqlite3.connect(DB_PATH)
+        connection.row_factory = sqlite3.Row
         return connection
+    except sqlite3.Error as error:
+        print(f"Error connecting to database: {error}")
         return None
+def _tokenize(text: str) -> set[str]:
+    tokens = re.findall(r"[A-Za-z0-9]+", text.lower())
+    return {token for token in tokens if token not in STOPWORDS}
+def _quote_identifier(identifier: str) -> str:
+    escaped_identifier = identifier.replace('"', '""')
+    return f'"{escaped_identifier}"'
+def _load_schema_metadata(connection: sqlite3.Connection) -> Dict[str, Dict[str, object]]:
+    cursor = connection.cursor()
+    cursor.execute(
+        """
+        SELECT name, sql
+        FROM sqlite_master
+        WHERE type = 'table' AND name NOT LIKE 'sqlite_%'
+        ORDER BY name
+        """
+    )
+    metadata: Dict[str, Dict[str, object]] = {}
+    for row in cursor.fetchall():
+        table_name = row["name"]
+        quoted_table = _quote_identifier(table_name)
+        columns = cursor.execute(f"PRAGMA table_info({quoted_table})").fetchall()
+        foreign_keys = cursor.execute(f"PRAGMA foreign_key_list({quoted_table})").fetchall()
+        metadata[table_name] = {
+            "ddl": row["sql"] or "",
+            "columns": [
+                {
+                    "name": column["name"],
+                    "type": column["type"] or "TEXT",
+                    "notnull": bool(column["notnull"]),
+                    "pk": bool(column["pk"]),
+                }
+                for column in columns
+            ],
+            "foreign_keys": [
+                {
+                    "from": foreign_key["from"],
+                    "to_table": foreign_key["table"],
+                    "to_column": foreign_key["to"],
+                }
+                for foreign_key in foreign_keys
+            ],
+        }
+    return metadata
+def _build_table_summary(table_name: str, table_info: Dict[str, object]) -> str:
+    column_parts = []
+    for column in table_info["columns"]:
+        tags = []
+        if column["pk"]:
+            tags.append("PK")
+        if column["notnull"]:
+            tags.append("NOT NULL")
+        tag_suffix = f" [{' '.join(tags)}]" if tags else ""
+        column_parts.append(f"{column['name']} {column['type']}{tag_suffix}")
+    summary = f"Table {table_name}: {', '.join(column_parts)}"
+    if table_info["foreign_keys"]:
+        relationships = ", ".join(
+            f"{table_name}.{foreign_key['from']} -> "
+            f"{foreign_key['to_table']}.{foreign_key['to_column']}"
+            for foreign_key in table_info["foreign_keys"]
+        )
+        summary = f"{summary}\nRelationships: {relationships}"
+    return summary
+def _rank_tables(
+    metadata: Dict[str, Dict[str, object]], question: str | None, max_tables: int
+) -> List[str]:
+    table_names = list(metadata.keys())
+    if not question:
+        return table_names
+    question_tokens = _tokenize(question)
+    if not question_tokens:
+        return table_names
+    scored_tables = []
+    for table_name, table_info in metadata.items():
+        table_tokens = _tokenize(table_name)
+        column_tokens = set()
+        for column in table_info["columns"]:
+            column_tokens.update(_tokenize(column["name"]))
+        score = 0
+        score += 4 * len(question_tokens & table_tokens)
+        score += 2 * len(question_tokens & column_tokens)
+        singular_name = table_name[:-1].lower() if table_name.lower().endswith("s") else ""
+        if singular_name and singular_name in question.lower():
+            score += 2
+        if table_name.lower() in question.lower():
+            score += 3
+        scored_tables.append((score, table_name))
+    scored_tables.sort(key=lambda item: (-item[0], item[1]))
+    selected = [table_name for score, table_name in scored_tables if score > 0][:max_tables]
+    if not selected:
+        selected = [table_name for _, table_name in scored_tables[:max_tables]]
+    # Pull in directly related tables so the model sees valid join paths.
+    expanded = list(selected)
+    for table_name in selected:
+        for foreign_key in metadata[table_name]["foreign_keys"]:
+            related_table = foreign_key["to_table"]
+            if related_table in metadata and related_table not in expanded:
+                expanded.append(related_table)
+    for table_name, table_info in metadata.items():
+        for foreign_key in table_info["foreign_keys"]:
+            if foreign_key["to_table"] in selected and table_name not in expanded:
+                expanded.append(table_name)
+    return expanded[: max(max_tables, len(expanded))]
+def get_schema_context(question: str | None = None, max_tables: int = 7) -> str:
+    """Extract schema information for prompt construction.
+    When a question is provided, the returned schema is narrowed to the most
+    relevant tables plus their immediate relationships. This keeps prompts
+    smaller while preserving valid join paths.
+    """
+    connection = get_db_connection()
+    if not connection:
+        return "Unable to connect to the database to retrieve schema information."
+    try:
+        metadata = _load_schema_metadata(connection)
+    finally:
+        connection.close()
+    selected_tables = _rank_tables(metadata, question, max_tables=max_tables)
+    schema_sections = [_build_table_summary(table_name, metadata[table_name]) for table_name in selected_tables]
+    all_relationships = []
+    for table_name in selected_tables:
+        for foreign_key in metadata[table_name]["foreign_keys"]:
+            if foreign_key["to_table"] in selected_tables:
+                all_relationships.append(
+                    f"{table_name}.{foreign_key['from']} = "
+                    f"{foreign_key['to_table']}.{foreign_key['to_column']}"
+                )
+    if all_relationships:
+        schema_sections.append("Join paths:\n" + "\n".join(sorted(set(all_relationships))))
+    return "\n\n".join(schema_sections)
 if __name__ == "__main__":
     connection = get_db_connection()
     if connection:
         print("Database connection successful!")
         cursor = connection.cursor()
         cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
+        print("Tables in the database:", [row[0] for row in cursor.fetchall()])
         connection.close()
     else:
         print("Failed to connect to the database.")

src/nl2sql/__pycache__/hf_engine.cpython-313.pyc ADDED Viewed

Binary file (4.4 kB). View file

src/nl2sql/hf_engine.py ADDED Viewed

	@@ -0,0 +1,98 @@

+#"""Hugging Face inference helpers for SQL generation."""
+import os
+import re
+from dotenv import load_dotenv
+from huggingface_hub import InferenceClient
+load_dotenv()
+hf_token = os.getenv("HF_TOKEN")
+if not hf_token:
+    raise ValueError("Token Not Found!")
+client = InferenceClient(api_key=hf_token)
+MODEL_ID = "defog/llama-3-sqlcoder-8b:featherless-ai"
+def _build_messages(question: str, schema_context: str):
+    system_content = (
+        "You are an expert SQLite assistant that converts natural language into one "
+        "executable SQLite query.\n"
+        "Rules:\n"
+        "1. Use only tables, columns, and join paths present in the provided schema.\n"
+        "2. Generate valid SQLite syntax only.\n"
+        "3. Prefer exact column names from the schema, never invent columns.\n"
+        "4. Use explicit JOIN conditions when multiple tables are required.\n"
+        "5. Use GROUP BY for aggregates by entity, HAVING for aggregate filters, "
+        "ORDER BY for ranking, and LIMIT for top-N requests.\n"
+        "6. Return SQL only. No markdown, explanations, comments, or chain-of-thought.\n"
+        "7. If a join is needed, use short aliases that remain readable.\n"
+        "8. Produce a single SELECT statement."
+    )
+    user_content = f"""Database schema:
+{schema_context}
+Question:
+{question}
+Write the SQLite query that answers the question. Return only the SQL query."""
+    return [
+        {"role": "system", "content": system_content},
+        {"role": "user", "content": user_content},
+    ]
+def _extract_sql(raw_response: str) -> str:
+    text = raw_response.strip()
+    fenced_match = re.search(r"```(?:sql)?\s*(.*?)```", text, flags=re.IGNORECASE | re.DOTALL)
+    if fenced_match:
+        text = fenced_match.group(1).strip()
+    statement_match = re.search(
+        r"(?is)\b(WITH|SELECT)\b.*?(;|$)",
+        text,
+    )
+    if statement_match:
+        text = statement_match.group(0).strip()
+    lines = [
+        line.strip()
+        for line in text.splitlines()
+        if line.strip() and not line.strip().startswith(("--", "#"))
+    ]
+    sql = " ".join(lines).strip()
+    if sql and not sql.endswith(";"):
+        sql = f"{sql};"
+    return sql
+def generate_sql(question, ddl):
+    try:
+        completion = client.chat.completions.create(
+            model=MODEL_ID,
+            messages=_build_messages(question, ddl),
+            max_tokens=220,
+            temperature=0,
+        )
+        raw_response = completion.choices[0].message.content or ""
+        sql = _extract_sql(raw_response)
+        return sql or raw_response.strip()
+    except Exception as error:
+        return f"Error: {error}"
+if __name__ == "__main__":
+    my_ddl = "CREATE TABLE tracks (id INTEGER PRIMARY KEY, title TEXT, genre TEXT);"
+    my_question = "How many tracks are there in each genre?"
+    print("Generating SQL query via Featherless AI...")
+    try:
+        result = generate_sql(my_question, my_ddl)
+        print("-" * 20)
+        print(result)
+    except Exception as error:
+        print(f"An error occurred: {error}")

src/scripts/__pycache__/evaluate_hf.cpython-313.pyc ADDED Viewed

Binary file (4.92 kB). View file

src/scripts/evaluate_hf.py ADDED Viewed

	@@ -0,0 +1,110 @@

+#"""Evaluation script for Hugging Face SQL generation."""
+import json
+from pathlib import Path
+import pandas as pd
+from src.database.db_manager import get_db_connection, get_schema_context
+from src.nl2sql.hf_engine import generate_sql
+TEST_CASES_PATH = Path("src/scripts/test_cases.json")
+RESULTS_PATH = Path("hf_evaluation_results.json")
+def _normalize_dataframe(dataframe: pd.DataFrame) -> pd.DataFrame:
+    normalized = dataframe.copy()
+    normalized.columns = [str(column).lower() for column in normalized.columns]
+    for column in normalized.columns:
+        normalized[column] = normalized[column].map(
+            lambda value: round(float(value), 6)
+            if isinstance(value, float)
+            else value
+        )
+    sort_columns = list(normalized.columns)
+    if sort_columns:
+        normalized = normalized.sort_values(by=sort_columns, kind="mergesort").reset_index(drop=True)
+    return normalized
+def compare_results(df_generated: pd.DataFrame, df_gold: pd.DataFrame) -> bool:
+    """Compare generated and expected query results."""
+    if df_generated is None or df_gold is None:
+        return False
+    try:
+        normalized_generated = _normalize_dataframe(df_generated)
+        normalized_gold = _normalize_dataframe(df_gold)
+        return normalized_generated.equals(normalized_gold)
+    except Exception as error:
+        print(f"Error comparing results: {error}")
+        return False
+def run_evaluation():
+    with TEST_CASES_PATH.open("r", encoding="utf-8") as handle:
+        test_cases = json.load(handle)
+    results = []
+    correct_count = 0
+    print(f"Running evaluation on {len(test_cases)} test cases...\n")
+    for case in test_cases:
+        question = case["question"]
+        print(f"Testing ID {case['id']}: {question[:50]}...")
+        schema_context = get_schema_context(question=question)
+        generated_sql = generate_sql(question, schema_context)
+        connection = get_db_connection()
+        if connection is None:
+            raise RuntimeError("Unable to connect to the SQLite database.")
+        try:
+            df_generated = pd.read_sql_query(generated_sql, connection)
+            df_gold = pd.read_sql_query(case["gold_sql"], connection)
+            is_correct = compare_results(df_generated, df_gold)
+            if is_correct:
+                correct_count += 1
+            results.append(
+                {
+                    "id": case["id"],
+                    "question": question,
+                    "status": "PASS" if is_correct else "FAIL",
+                    "generated_sql": generated_sql,
+                    "gold_sql": case["gold_sql"],
+                }
+            )
+        except Exception as error:
+            results.append(
+                {
+                    "id": case["id"],
+                    "question": question,
+                    "status": "ERROR",
+                    "generated_sql": generated_sql,
+                    "gold_sql": case["gold_sql"],
+                    "error": str(error),
+                }
+            )
+        finally:
+            connection.close()
+    accuracy = (correct_count / len(test_cases)) * 100 if test_cases else 0.0
+    print("\nEVALUATION COMPLETE")
+    print(f"Total Test Cases: {len(test_cases)}")
+    print(f"Correctly Generated SQL: {correct_count} / {len(test_cases)}")
+    print(f"Execution Accuracy: {accuracy:.2f}%")
+    with RESULTS_PATH.open("w", encoding="utf-8") as handle:
+        json.dump(results, handle, indent=4)
+if __name__ == "__main__":
+    run_evaluation()

src/scripts/test_cases.json CHANGED Viewed

@@ -1,77 +1,77 @@
 [
-    {
-        "id": 1,
-        "question": "How many tracks are there in each genre? List the genre name and the count.",
-        "gold_sql": "SELECT t.Genre, COUNT(t.TrackId) AS TrackCount FROM Track t GROUP BY t.Genre;"
-    },
-    {
-        "id": 2,
-        "question": "Provide a list of all albums and the name of the artist who created them.",
-        "gold_sql": "SELECT a.Title, ar.Name FROM Album a JOIN Artist ar ON a.ArtistId = ar.ArtistId;"
-    },
-    {
-        "id": 3,
-        "question": "What is the total revenue generated from each country?",
-        "gold_sql": "SELECT BillingCountry, SUM(Total) FROM Invoice GROUP BY BillingCountry;"
-    },
-    {
-        "id": 4,
-        "question": "Show the full names of all employees who are Sales Support Agents.",
-        "gold_sql": "SELECT e.FirstName, e.LastName FROM Employee e WHERE e.Title = 'Sales Support Agent';"
-    },
-    {
-        "id": 5,
-        "question": "List the top 5 customers who have spent the most money.",
-        "gold_sql": "SELECT c.CustomerId, c.FirstName, c.LastName, SUM(i.Total) as TotalSpent FROM Customer c JOIN Invoice i ON c.CustomerId = i.CustomerId GROUP BY c.CustomerId, c.FirstName, c.LastName ORDER BY TotalSpent DESC LIMIT 5;"
-    },
-    {
-        "id": 6,
-        "question": "List all Rock songs and the artists who performed them.",
-        "gold_sql": "SELECT t.Name, ar.Name FROM Track t JOIN Genre g ON t.GenreId = g.GenreId JOIN Album a ON t.AlbumId = a.AlbumId JOIN Artist ar ON a.ArtistId = ar.ArtistId WHERE g.Name = 'Rock';"
-    },
-    {
-        "id": 7,
-        "question": "Find the total number of tracks sold for each media type.",
-        "gold_sql": "SELECT m.Name, COUNT(il.TrackId) FROM MediaType m JOIN Track t ON m.MediaTypeId = t.MediaTypeId JOIN InvoiceLine il ON t.TrackId = il.TrackId GROUP BY m.Name;"
-    },
-    {
-        "id": 8,
-        "question": "Show the names of all tracks that appear on the 'TV Shows' playlist.",
-        "gold_sql": "SELECT t.Name FROM Track t JOIN PlaylistTrack pt ON t.TrackId = pt.TrackId JOIN Playlist p ON pt.PlaylistId = p.PlaylistId WHERE p.Name = 'TV Shows';"
-    },
-    {
-        "id": 9,
-        "question": "Which artist has the most tracks? Give the name and count.",
-        "gold_sql": "SELECT ar.Name, COUNT(t.TrackId) FROM Artist ar JOIN Album a ON ar.ArtistId = a.ArtistId JOIN Track t ON a.AlbumId = t.AlbumId GROUP BY ar.Name ORDER BY COUNT(t.TrackId) DESC LIMIT 1;"
-    },
-    {
-        "id": 10,
-        "question": "Which genres have more than 100 tracks?",
-        "gold_sql": "SELECT g.Name, COUNT(t.TrackId) as TrackCount FROM Genre g JOIN Track t ON g.GenreId = t.GenreId GROUP BY g.Name HAVING TrackCount > 100;"
-    },
-    {
-        "id": 11,
-        "question": "Who is the best-selling artist by total revenue? Provide the artist's name and total revenue.",
-        "gold_sql": "SELECT ar.Name, SUM(i.Total) as TotalRevenue FROM Artist ar JOIN Album a ON ar.ArtistId = a.ArtistId JOIN Track t ON a.AlbumId = t.AlbumId JOIN InvoiceLine il ON t.TrackId = il.TrackId JOIN Invoice i ON il.InvoiceId = i.InvoiceId GROUP BY ar.Name ORDER BY TotalRevenue DESC LIMIT 1;"
-    },
-    {
-        "id": 12,
-        "question": "Find the average length of tracks in seconds for each album. List the album title and average length.",
-        "gold_sql": "SELECT a.Title, AVG(t.Milliseconds) as AverageLength FROM Album a JOIN Track t ON a.AlbumId = t.AlbumId GROUP BY a.Title;"
-    },
-    {
-        "id": 13,
-        "question": "List customers helped by the employee Jane Peacock. Provide the customer's full name and the employee's full name.",
-        "gold_sql": "SELECT c.FirstName AS CustomerFirstName, c.LastName AS CustomerLastName, e.FirstName AS EmployeeFirstName, e.LastName AS EmployeeLastName FROM Customer c JOIN Employee e ON c.SupportRepId = e.EmployeeId WHERE e.FirstName = 'Jane' AND e.LastName = 'Peacock';"
-    },
-    {
-        "id": 14,
-        "question": "Which city had the highest number of invoices in 2013?",
-        "gold_sql": "SELECT BillingCity, COUNT(InvoiceId) FROM Invoice WHERE InvoiceDate LIKE '2013%' GROUP BY BillingCity ORDER BY 2 DESC LIMIT 1;"
-    },
-    {
-        "id": 15,
-        "question": "List albums with a total price greater than 20 dollars.",
-        "gold_sql": "SELECT al.Title, SUM(t.UnitPrice) FROM Album al JOIN Track t ON al.AlbumId = t.AlbumId GROUP BY al.Title HAVING SUM(t.UnitPrice) > 20;"
-    }
 ]

 [
+  {
+    "id": 1,
+    "question": "List all the artists name in the database.",
+    "gold_sql": "SELECT Name FROM Artist;"
+  },
+  {
+    "id": 2,
+    "question": "How many genres are there?",
+    "gold_sql": "SELECT COUNT(*) FROM Genre;"
+  },
+  {
+    "id": 3,
+    "question": "List the names of the first 5 tracks.",
+    "gold_sql": "SELECT Name FROM Track LIMIT 5;"
+  },
+  {
+    "id": 4,
+    "question": "Count the number of customers located in the USA.",
+    "gold_sql": "SELECT COUNT(*) FROM Customer WHERE Country = 'USA';"
+  },
+  {
+    "id": 5,
+    "question": "Find all invoices for the customer with ID 1.",
+    "gold_sql": "SELECT * FROM Invoice WHERE CustomerId = 1;"
+  },
+  {
+    "id": 6,
+    "question": "List each album title along with the artist's name.",
+    "gold_sql": "SELECT Album.Title, Artist.Name FROM Album JOIN Artist ON Album.ArtistId = Artist.ArtistId;"
+  },
+  {
+    "id": 7,
+    "question": "How many tracks belong to the 'Rock' genre?",
+    "gold_sql": "SELECT COUNT(*) FROM Track JOIN Genre ON Track.GenreId = Genre.GenreId WHERE Genre.Name = 'Rock';"
+  },
+  {
+    "id": 8,
+    "question": "Show the total revenue generated from each country.",
+    "gold_sql": "SELECT BillingCountry, SUM(Total) FROM Invoice GROUP BY BillingCountry;"
+  },
+  {
+    "id": 9,
+    "question": "Find the total number of items sold for each media type.",
+    "gold_sql": "SELECT MediaType.Name, COUNT(InvoiceLine.TrackId) FROM InvoiceLine JOIN Track ON InvoiceLine.TrackId = Track.TrackId JOIN MediaType ON Track.MediaTypeId = MediaType.MediaTypeId GROUP BY MediaType.Name;"
+  },
+  {
+    "id": 10,
+    "question": "List the first and last names of all employees who are Sales Support Agents.",
+    "gold_sql": "SELECT FirstName, LastName FROM Employee WHERE Title = 'Sales Support Agent';"
+  },
+  {
+    "id": 11,
+    "question": "List the top 5 customers who have spent the most money in total.",
+    "gold_sql": "SELECT c.FirstName, c.LastName, SUM(i.Total) as TotalSpent FROM Customer c JOIN Invoice i ON c.CustomerId = i.CustomerId GROUP BY c.CustomerId ORDER BY TotalSpent DESC LIMIT 5;"
+  },
+  {
+    "id": 12,
+    "question": "Which artist has the most tracks in the database? Give the name and count.",
+    "gold_sql": "SELECT ar.Name, COUNT(t.TrackId) as TrackCount FROM Artist ar JOIN Album al ON ar.ArtistId = al.ArtistId JOIN Track t ON al.AlbumId = t.AlbumId GROUP BY ar.ArtistId ORDER BY TrackCount DESC LIMIT 1;"
+  },
+  {
+    "id": 13,
+    "question": "Which genres have more than 100 tracks? List the genre name and count.",
+    "gold_sql": "SELECT g.Name, COUNT(t.TrackId) as TrackCount FROM Genre g JOIN Track t ON g.GenreId = t.GenreId GROUP BY g.GenreId HAVING TrackCount > 100;"
+  },
+  {
+    "id": 14,
+    "question": "Calculate the average track length in seconds for each genre.",
+    "gold_sql": "SELECT g.Name, AVG(t.Milliseconds) / 1000.0 as AvgSeconds FROM Genre g JOIN Track t ON g.GenreId = t.GenreId GROUP BY g.GenreId;"
+  },
+  {
+    "id": 15,
+    "question": "Identify the artist who has earned the most revenue from customers in Canada.",
+    "gold_sql": "SELECT ar.Name, SUM(il.UnitPrice * il.Quantity) AS Revenue FROM Artist ar JOIN Album al ON ar.ArtistId = al.ArtistId JOIN Track t ON al.AlbumId = t.AlbumId JOIN InvoiceLine il ON t.TrackId = il.TrackId JOIN Invoice i ON il.InvoiceId = i.InvoiceId WHERE i.BillingCountry = 'Canada' GROUP BY ar.ArtistId ORDER BY Revenue DESC LIMIT 1;"
+  }
 ]