Spaces:

adamboom111
/

text2sql

Sleeping

File size: 5,323 Bytes

from dotenv import load_dotenv
import os
import gradio as gr
from groq import Groq

load_dotenv()
api = os.getenv("GROQ_API_KEY")
client = Groq(api_key=api)

### --- TAB 1: SQL Generator --- ###
def create_prompt(user_query, table_metadata):
    system_prompt = """
You are a SQL query generator for a single relational table.
You must strictly follow the metadata and never guess or invent column names.
Instructions:
- Use only the table and columns listed in the metadata.
- Never generate queries with columns not present in the metadata.
- If a column like 'gender' is not present, do not mention it.
- Do not hallucinate values or table names. Use provided structure only.
- Output valid SQL (DuckDB-compatible), single line, no comments or explanations.
Input:
User Query: {user_query}
Table Metadata:
{table_metadata}
Output:
A single valid SQL SELECT statement using only metadata-provided columns.
    """
    return system_prompt.strip(), f"User Query: {user_query}\nTable Metadata: {table_metadata}"

def generate_output(system_prompt, user_prompt):
    chat_completion = client.chat.completions.create(
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ],
        model="llama3-70b-8192"
    )
    response = chat_completion.choices[0].message.content.strip()
    return response if response.lower().startswith("select") else "Can't perform the task at the moment."

def response(payload):
    user_query = payload.get("question", "")
    table_metadata = payload.get("schema", "")
    system_prompt, user_prompt = create_prompt(user_query, table_metadata)
    return generate_output(system_prompt, user_prompt)


### --- TAB 2: SQL Output Explanation --- ###
def explain_output_prompt(sql_query, query_result):
    system_prompt = """
You are an assistant that explains the meaning of SQL query results in plain language.
You should take into account the SQL query used and the resulting output.
Avoid assumptions. Focus on summarizing what the data reveals.
"""
    user_prompt = f"""
SQL Query:
{sql_query}
Query Result:
{query_result}
Explanation:
"""
    return system_prompt.strip(), user_prompt.strip()

def explain_sql_output(sql_query, query_result):
    system_prompt, user_prompt = explain_output_prompt(sql_query, query_result)
    chat_completion = client.chat.completions.create(
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ],
        model="llama3-70b-8192"
    )
    return chat_completion.choices[0].message.content.strip()

### --- TAB 3: Data Summary & Insights --- ###

def summarize_data(data_file):
    try:
        df = pd.read_csv(data_file.name)
    except Exception:
        try:
            df = pd.read_json(data_file.name)
        except Exception as e:
            return f"Failed to read file: {str(e)}"

    metadata = "\n".join([f"- {col}: {str(dtype)}" for col, dtype in df.dtypes.items()])
    preview = df.head(30).to_csv(index=False)

    system_prompt = """
You are a professional data analyst AI that specializes in summarizing datasets and uncovering insights.

Your task is to:
1. Provide a high-level summary of the dataset.
2. Identify 5 insightful observations or trends.
3. Highlight any interesting patterns, anomalies, or correlations you find.
4. Use simple, clear language understandable to non-technical users.
5. If relevant, suggest what kind of decisions or actions could be made based on the data.

Only use the provided preview and schema — do not assume missing data or guess columns.
Be helpful, concise, and specific.
"""

    user_prompt = f"""
Here is a preview of the dataset (first 30 rows):

{preview}

Here is the schema (column name: type):

{metadata}

Please generate:
- A brief overview of the dataset.
- Five unique, data-driven insights or summaries.
- Optional: Patterns or anomalies worth noting.
- Keep it understandable and actionable.
"""

    chat_completion = client.chat.completions.create(
        messages=[
            {"role": "system", "content": system_prompt.strip()},
            {"role": "user", "content": user_prompt.strip()}
        ],
        model="llama3-70b-8192"
    )

    return chat_completion.choices[0].message.content.strip()

### --- Gradio Interface --- ###
tab1 = gr.Interface(
    fn=response,
    inputs=gr.JSON(label="Input JSON (question, schema)"),
    outputs="text",
    title="SQL Generator (Groq + LLaMA3)",
    description="Input: question & table metadata. Output: SQL using dynamic schema."
)

tab2 = gr.Interface(
    fn=explain_sql_output,
    inputs=[
        gr.Textbox(label="SQL Query"),
        gr.Textbox(label="SQL Output (Raw JSON or Table Result)")
    ],
    outputs="text",
    title="Explain SQL Result (Groq + LLaMA3)",
    description="Input a SQL query and its result. Get an AI-generated explanation."
)

tab3 = gr.Interface(
    fn=summarize_data,
    inputs=gr.File(label="Upload CSV or JSON Dataset"),
    outputs="text",
    title="Data Summary & Insights (Groq + LLaMA3)",
    description="Upload a dataset to get a general summary and 5 AI-generated insights."
)

demo = gr.TabbedInterface([tab1, tab2], ["SQL Generator", "Explain Output"])

if __name__ == '__main__':
    demo.launch()