File size: 5,323 Bytes
1f25563
 
202b6de
1f25563
 
 
3902d48
446f0d8
1f25563
446f0d8
1f25563
 
9b2d1b9
 
 
 
 
 
18824a6
9b2d1b9
1f25563
 
9b2d1b9
 
1f25563
9b2d1b9
18824a6
1f25563
 
 
 
 
 
 
 
 
42749e6
1f25563
 
f63709a
1f25563
 
 
 
 
aa4a308
446f0d8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0c4d448
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
446f0d8
bf9eb1e
446f0d8
1f25563
 
202b6de
1f25563
 
202b6de
 
446f0d8
 
 
 
 
 
 
 
 
 
 
0c4d448
 
 
 
 
 
 
 
bf9eb1e
446f0d8
 
bf9eb1e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
from dotenv import load_dotenv
import os
import gradio as gr
from groq import Groq

load_dotenv()
api = os.getenv("GROQ_API_KEY")
client = Groq(api_key=api)

### --- TAB 1: SQL Generator --- ###
def create_prompt(user_query, table_metadata):
    system_prompt = """
You are a SQL query generator for a single relational table.
You must strictly follow the metadata and never guess or invent column names.
Instructions:
- Use only the table and columns listed in the metadata.
- Never generate queries with columns not present in the metadata.
- If a column like 'gender' is not present, do not mention it.
- Do not hallucinate values or table names. Use provided structure only.
- Output valid SQL (DuckDB-compatible), single line, no comments or explanations.
Input:
User Query: {user_query}
Table Metadata:
{table_metadata}
Output:
A single valid SQL SELECT statement using only metadata-provided columns.
    """
    return system_prompt.strip(), f"User Query: {user_query}\nTable Metadata: {table_metadata}"

def generate_output(system_prompt, user_prompt):
    chat_completion = client.chat.completions.create(
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ],
        model="llama3-70b-8192"
    )
    response = chat_completion.choices[0].message.content.strip()
    return response if response.lower().startswith("select") else "Can't perform the task at the moment."

def response(payload):
    user_query = payload.get("question", "")
    table_metadata = payload.get("schema", "")
    system_prompt, user_prompt = create_prompt(user_query, table_metadata)
    return generate_output(system_prompt, user_prompt)


### --- TAB 2: SQL Output Explanation --- ###
def explain_output_prompt(sql_query, query_result):
    system_prompt = """
You are an assistant that explains the meaning of SQL query results in plain language.
You should take into account the SQL query used and the resulting output.
Avoid assumptions. Focus on summarizing what the data reveals.
"""
    user_prompt = f"""
SQL Query:
{sql_query}
Query Result:
{query_result}
Explanation:
"""
    return system_prompt.strip(), user_prompt.strip()

def explain_sql_output(sql_query, query_result):
    system_prompt, user_prompt = explain_output_prompt(sql_query, query_result)
    chat_completion = client.chat.completions.create(
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ],
        model="llama3-70b-8192"
    )
    return chat_completion.choices[0].message.content.strip()

### --- TAB 3: Data Summary & Insights --- ###

def summarize_data(data_file):
    try:
        df = pd.read_csv(data_file.name)
    except Exception:
        try:
            df = pd.read_json(data_file.name)
        except Exception as e:
            return f"Failed to read file: {str(e)}"

    metadata = "\n".join([f"- {col}: {str(dtype)}" for col, dtype in df.dtypes.items()])
    preview = df.head(30).to_csv(index=False)

    system_prompt = """
You are a professional data analyst AI that specializes in summarizing datasets and uncovering insights.

Your task is to:
1. Provide a high-level summary of the dataset.
2. Identify 5 insightful observations or trends.
3. Highlight any interesting patterns, anomalies, or correlations you find.
4. Use simple, clear language understandable to non-technical users.
5. If relevant, suggest what kind of decisions or actions could be made based on the data.

Only use the provided preview and schema — do not assume missing data or guess columns.
Be helpful, concise, and specific.
"""

    user_prompt = f"""
Here is a preview of the dataset (first 30 rows):

{preview}

Here is the schema (column name: type):

{metadata}

Please generate:
- A brief overview of the dataset.
- Five unique, data-driven insights or summaries.
- Optional: Patterns or anomalies worth noting.
- Keep it understandable and actionable.
"""

    chat_completion = client.chat.completions.create(
        messages=[
            {"role": "system", "content": system_prompt.strip()},
            {"role": "user", "content": user_prompt.strip()}
        ],
        model="llama3-70b-8192"
    )

    return chat_completion.choices[0].message.content.strip()

### --- Gradio Interface --- ###
tab1 = gr.Interface(
    fn=response,
    inputs=gr.JSON(label="Input JSON (question, schema)"),
    outputs="text",
    title="SQL Generator (Groq + LLaMA3)",
    description="Input: question & table metadata. Output: SQL using dynamic schema."
)

tab2 = gr.Interface(
    fn=explain_sql_output,
    inputs=[
        gr.Textbox(label="SQL Query"),
        gr.Textbox(label="SQL Output (Raw JSON or Table Result)")
    ],
    outputs="text",
    title="Explain SQL Result (Groq + LLaMA3)",
    description="Input a SQL query and its result. Get an AI-generated explanation."
)

tab3 = gr.Interface(
    fn=summarize_data,
    inputs=gr.File(label="Upload CSV or JSON Dataset"),
    outputs="text",
    title="Data Summary & Insights (Groq + LLaMA3)",
    description="Upload a dataset to get a general summary and 5 AI-generated insights."
)

demo = gr.TabbedInterface([tab1, tab2], ["SQL Generator", "Explain Output"])

if __name__ == '__main__':
    demo.launch()