Spaces:
Sleeping
Sleeping
File size: 5,323 Bytes
1f25563 202b6de 1f25563 3902d48 446f0d8 1f25563 446f0d8 1f25563 9b2d1b9 18824a6 9b2d1b9 1f25563 9b2d1b9 1f25563 9b2d1b9 18824a6 1f25563 42749e6 1f25563 f63709a 1f25563 aa4a308 446f0d8 0c4d448 446f0d8 bf9eb1e 446f0d8 1f25563 202b6de 1f25563 202b6de 446f0d8 0c4d448 bf9eb1e 446f0d8 bf9eb1e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 |
from dotenv import load_dotenv
import os
import gradio as gr
from groq import Groq
load_dotenv()
api = os.getenv("GROQ_API_KEY")
client = Groq(api_key=api)
### --- TAB 1: SQL Generator --- ###
def create_prompt(user_query, table_metadata):
system_prompt = """
You are a SQL query generator for a single relational table.
You must strictly follow the metadata and never guess or invent column names.
Instructions:
- Use only the table and columns listed in the metadata.
- Never generate queries with columns not present in the metadata.
- If a column like 'gender' is not present, do not mention it.
- Do not hallucinate values or table names. Use provided structure only.
- Output valid SQL (DuckDB-compatible), single line, no comments or explanations.
Input:
User Query: {user_query}
Table Metadata:
{table_metadata}
Output:
A single valid SQL SELECT statement using only metadata-provided columns.
"""
return system_prompt.strip(), f"User Query: {user_query}\nTable Metadata: {table_metadata}"
def generate_output(system_prompt, user_prompt):
chat_completion = client.chat.completions.create(
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}
],
model="llama3-70b-8192"
)
response = chat_completion.choices[0].message.content.strip()
return response if response.lower().startswith("select") else "Can't perform the task at the moment."
def response(payload):
user_query = payload.get("question", "")
table_metadata = payload.get("schema", "")
system_prompt, user_prompt = create_prompt(user_query, table_metadata)
return generate_output(system_prompt, user_prompt)
### --- TAB 2: SQL Output Explanation --- ###
def explain_output_prompt(sql_query, query_result):
system_prompt = """
You are an assistant that explains the meaning of SQL query results in plain language.
You should take into account the SQL query used and the resulting output.
Avoid assumptions. Focus on summarizing what the data reveals.
"""
user_prompt = f"""
SQL Query:
{sql_query}
Query Result:
{query_result}
Explanation:
"""
return system_prompt.strip(), user_prompt.strip()
def explain_sql_output(sql_query, query_result):
system_prompt, user_prompt = explain_output_prompt(sql_query, query_result)
chat_completion = client.chat.completions.create(
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}
],
model="llama3-70b-8192"
)
return chat_completion.choices[0].message.content.strip()
### --- TAB 3: Data Summary & Insights --- ###
def summarize_data(data_file):
try:
df = pd.read_csv(data_file.name)
except Exception:
try:
df = pd.read_json(data_file.name)
except Exception as e:
return f"Failed to read file: {str(e)}"
metadata = "\n".join([f"- {col}: {str(dtype)}" for col, dtype in df.dtypes.items()])
preview = df.head(30).to_csv(index=False)
system_prompt = """
You are a professional data analyst AI that specializes in summarizing datasets and uncovering insights.
Your task is to:
1. Provide a high-level summary of the dataset.
2. Identify 5 insightful observations or trends.
3. Highlight any interesting patterns, anomalies, or correlations you find.
4. Use simple, clear language understandable to non-technical users.
5. If relevant, suggest what kind of decisions or actions could be made based on the data.
Only use the provided preview and schema — do not assume missing data or guess columns.
Be helpful, concise, and specific.
"""
user_prompt = f"""
Here is a preview of the dataset (first 30 rows):
{preview}
Here is the schema (column name: type):
{metadata}
Please generate:
- A brief overview of the dataset.
- Five unique, data-driven insights or summaries.
- Optional: Patterns or anomalies worth noting.
- Keep it understandable and actionable.
"""
chat_completion = client.chat.completions.create(
messages=[
{"role": "system", "content": system_prompt.strip()},
{"role": "user", "content": user_prompt.strip()}
],
model="llama3-70b-8192"
)
return chat_completion.choices[0].message.content.strip()
### --- Gradio Interface --- ###
tab1 = gr.Interface(
fn=response,
inputs=gr.JSON(label="Input JSON (question, schema)"),
outputs="text",
title="SQL Generator (Groq + LLaMA3)",
description="Input: question & table metadata. Output: SQL using dynamic schema."
)
tab2 = gr.Interface(
fn=explain_sql_output,
inputs=[
gr.Textbox(label="SQL Query"),
gr.Textbox(label="SQL Output (Raw JSON or Table Result)")
],
outputs="text",
title="Explain SQL Result (Groq + LLaMA3)",
description="Input a SQL query and its result. Get an AI-generated explanation."
)
tab3 = gr.Interface(
fn=summarize_data,
inputs=gr.File(label="Upload CSV or JSON Dataset"),
outputs="text",
title="Data Summary & Insights (Groq + LLaMA3)",
description="Upload a dataset to get a general summary and 5 AI-generated insights."
)
demo = gr.TabbedInterface([tab1, tab2], ["SQL Generator", "Explain Output"])
if __name__ == '__main__':
demo.launch() |