File size: 9,845 Bytes

e020674

'''
A collection of prompts for the text2sql operator.
'''
class TextSQLConsistencyPrompt:
    def __init__(self):
        pass

    def text_sql_consistency_prompt(self, question, sql):
        prompt = f"""
            ## SQL Consistency Verification Task
            
            **Objective**: Given the question and SQL query, determine if the SQL query correctly implements the requirements specified in the natural language Question.
            
            **Evaluation Criteria**:
            1. The SQL should reflect key elements from the Question:
            2. You can refer to the content in evidence to determine if the SQL meets the requirements of the question
            3. Since you are not given the database schema, you can only analyze the SQL query and its relation to the Question and evidence.
            4. Do not judge as inconsistent just because of the database schema
            
            **Input**:
            Question: {question}
            SQL: {sql}
            
            **Required Output Format**:
            Analysis: <Brief technical analysis of the alignment between Question and SQL>
            Conclusion: <"YES" if consistent or uncertain, "NO" if definitely inconsistent> (No other text)
            
            **Example**:
            Analysis: The SQL query correctly implements the requirements of the Question, (may be more).
            Conclusion: <YES>
            
            **Important Notes**:
            - Respond ONLY with the specified format above
            - "YES" should be used when SQL implements Question OR when you're uncertain
            - "NO" should be used when SQL contradicts the Question
            - Be strict with logical requirements but lenient with syntax variations
            """
        return prompt
    
class QuestionRefinePrompt:
    def __init__(self):
        pass

    def question_refine_prompt(self, question):
        """Refine the question"""
        prompt = (
            "Analyze the following question and determine if it needs clarification:\n"
            f"ORIGINAL QUESTION: {question}\n"
            "Instructions:\n"
            "1. If the question is already perfectly clear, output: 'NO'\n"
            "2. If clarification would help, rewrite it to be more precise while:\n"
            "   - Preserving all original meaning\n"
            "   - Not adding/removing any factual content\n"
            "   - Only improving clarity of expression\n\n"
            "Format your response exactly as:\n"
            "```\n"
            "ANALYSIS: <brief explanation of why rewrite is/isn't needed>\n"
            "RESULT: <either 'NO' or the rewritten question>\n"
            "```"
        )
        
        return prompt
    
class ExtraKnowledgePrompt:
    def __init__(self):
        pass

    def extra_knowledge_prompt(self, question, sql, schema):
        prompt = (
            "Analyze whether answering this database question requires additional knowledge beyond the provided SQL and schema.\n"
            f"QUESTION: {question}\n"
            f"SQL QUERY: {sql}\n"
            f"TABLE SCHEMA:\n{schema}\n\n"
            "Consider:\n"
            "1. Are there domain terms not explained in the schema?\n"
            "2. Does the query rely on implicit business rules?\n"
            "3. Is special knowledge needed to interpret results?\n\n"
            "Respond ONLY in this exact format:\n"
            "RESULT: <knowledge> OR RESULT: NO\n"
            "Where <knowledge> is a concise explanation of required additional knowledge.\n"
            "If no extra knowledge is needed, respond with exactly 'RESULT: NO'."
        )
        return prompt


class FinalPromptGeneration:
    def __init__(self):
        pass

    def dial_sql_cot_prompt(self, question, schema):
        prompt = (
            "/* Given the following database schema: */\n"
            f"{schema}\n\n"
            f"/* Answer the following: {question} */\n"
            "Let's think step by step ",
        )

        return prompt
    
    def dial_sql_non_cot_prompt(self, question, schema):
        prompt = (
            "/* Given the following database schema: */\n"
            f"{schema}\n\n"
            f"/* Answer the following: {question} */\n"
            "SELECT ",
        )

        return prompt
    
    def omni_sql_cot_prompt(self, question, schema):
        prompt = (
            "Task Overview:\n"
            "You are a data science expert. Below, you are provided with a database schema and a natural language question. Your task is to understand the schema and generate a valid SQL query to answer the question.\n\n"
            "Database Engine:\n"
            "SQLite\n\n"
            "Database Schema:\n"
            f"{schema}\n"
            "This schema describes the database's structure, including tables, columns, primary keys, foreign keys, and any relevant relationships or constraints.\n\n"
            "Question:\n"
            f"{question}\n\n"
            "Instructions:\n" 
            "- Make sure you only output the information that is asked in the question. If the question asks for a specific column, make sure to only include that column in the SELECT clause, nothing more.\n"
            "- The generated query should return all of the information asked in the question without any missing or extra information.\n"
            "- Before generating the final SQL query, please think through the steps of how to write the query.\n\n"
            "Output Format:\n"
            "In your answer, please enclose the generated SQL query in a code block:\n```sql\n-- Your SQL query\n```\n\n"
            "Take a deep breath and think step by step to find the correct SQL query.\n"
        )

        return prompt
    
    def omni_sql_non_cot_prompt(self, question, schema):
        prompt = (
            "Task Overview:\n"
            "You are a data science expert. Below, you are provided with a database schema and a natural language question. Your task is to understand the schema and generate a valid SQL query to answer the question.\n\n"
            "Database Engine:\n"
            "SQLite\n\n"
            "Database Schema:\n"
            f"{schema}\n"
            "This schema describes the database's structure, including tables, columns, primary keys, foreign keys, and any relevant relationships or constraints.\n\n"
            "Question:\n"
            f"{question}\n\n"
            # "Instructions:\n" 
            # "- Make sure you only output the information that is asked in the question. If the question asks for a specific column, make sure to only include that column in the SELECT clause, nothing more.\n"
            # "- The generated query should return all of the information asked in the question without any missing or extra information.\n"
            # "- Before generating the final SQL query, please think through the steps of how to write the query.\n\n"
            "Output Format:\n"
            "In your answer, please enclose the generated SQL query in a code block:\n```sql\n-- Your SQL query\n```\n\n"
            "Take a deep breath and think step by step to find the correct SQL query.\n"
        )

        return prompt

class Text2SQLCotPrompt:
    def __init__(self):
        pass

    def text2sql_cot_prompt(self, schema, question, sql):
        prompt = f"""
            You are a senior data analyst specializing in SQL. Your task is to translate a natural language question into an executable SQLite query, providing a detailed reasoning trace.

            You will also receive a reference solution from a colleague, which may or may not be correct. This extra information intends to help you generate your answer, but you are asked not to mention the reference solution in any form.
            The reference solution might include: 
            1. Unnecessary table and column selections. 
            2. Incorrect or excessive joins. 
            3. Misalignment with the question.
            4. Opportunities for simplification.

            Ensure the SQL query is presented in a Markdown code block with proper syntax highlighting, like this:
            ```sql
            SELECT * FROM table;
            ```

            [Database Schema]:
            {schema}

            [Natural Language Question]:
            {question}

            [Reference Solution]:
            ```sql
            {sql}
            ```

            Provide your step-by-step text-to-SQL solution here.
        """
        return prompt
    
    def text2sql_cot_prompt_backup(self, schema, question, sql):
        template = """You are a senior data analyst who specializes in solving complex data query problems using SQL. Your task is to **reason step-by-step from a natural language question to its corresponding SQL query**, based on the provided database schema, question, and SQL statement. What I need is the reasoning process.
        Please present your thought process clearly and systematically. This should include (but not be limited to) the following aspects:
        1. What are the key pieces of information mentioned in the question?
        2. From which tables should the data be retrieved?
        3. Which fields or columns are involved?
        4. Are there operations such as aggregation, filtering, or sorting required?
        5. Why was the SQL written this way? Explain the logic behind each step.
        Your final output should be about how you arrived at the SQL query from the original question.
        [Database Schema]:
        {schema}
        [Natural Language Question]:
        {question}
        [SQL]:
        ```sql
        {sql}
        ```
        Please provide your step-by-step analysis. Begin with let's think step by step."""
        return template.format(schema=schema, question=question, sql=sql)