Spaces:
Sleeping
Sleeping
| from dotenv import load_dotenv | |
| import os | |
| import gradio as gr | |
| from groq import Groq | |
| load_dotenv() | |
| api = os.getenv("GROQ_API_KEY") | |
| client = Groq(api_key=api) | |
| ### --- TAB 1: SQL Generator --- ### | |
| def create_prompt(user_query, table_metadata): | |
| system_prompt = """ | |
| You are a SQL query generator for a single relational table. | |
| You must strictly follow the metadata and never guess or invent column names. | |
| Instructions: | |
| - Use only the table and columns listed in the metadata. | |
| - Never generate queries with columns not present in the metadata. | |
| - If a column like 'gender' is not present, do not mention it. | |
| - Do not hallucinate values or table names. Use provided structure only. | |
| - Output valid SQL (DuckDB-compatible), single line, no comments or explanations. | |
| Input: | |
| User Query: {user_query} | |
| Table Metadata: | |
| {table_metadata} | |
| Output: | |
| A single valid SQL SELECT statement using only metadata-provided columns. | |
| """ | |
| return system_prompt.strip(), f"User Query: {user_query}\nTable Metadata: {table_metadata}" | |
| def generate_output(system_prompt, user_prompt): | |
| chat_completion = client.chat.completions.create( | |
| messages=[ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": user_prompt} | |
| ], | |
| model="llama3-70b-8192" | |
| ) | |
| response = chat_completion.choices[0].message.content.strip() | |
| return response if response.lower().startswith("select") else "Can't perform the task at the moment." | |
| def response(payload): | |
| user_query = payload.get("question", "") | |
| table_metadata = payload.get("schema", "") | |
| system_prompt, user_prompt = create_prompt(user_query, table_metadata) | |
| return generate_output(system_prompt, user_prompt) | |
| ### --- TAB 2: SQL Output Explanation --- ### | |
| def explain_output_prompt(sql_query, query_result): | |
| system_prompt = """ | |
| You are an assistant that explains the meaning of SQL query results in plain language. | |
| You should take into account the SQL query used and the resulting output. | |
| Avoid assumptions. Focus on summarizing what the data reveals. | |
| """ | |
| user_prompt = f""" | |
| SQL Query: | |
| {sql_query} | |
| Query Result: | |
| {query_result} | |
| Explanation: | |
| """ | |
| return system_prompt.strip(), user_prompt.strip() | |
| def explain_sql_output(sql_query, query_result): | |
| system_prompt, user_prompt = explain_output_prompt(sql_query, query_result) | |
| chat_completion = client.chat.completions.create( | |
| messages=[ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": user_prompt} | |
| ], | |
| model="llama3-70b-8192" | |
| ) | |
| return chat_completion.choices[0].message.content.strip() | |
| ### --- TAB 3: Data Summary & Insights --- ### | |
| def summarize_data(data_file): | |
| try: | |
| df = pd.read_csv(data_file.name) | |
| except Exception: | |
| try: | |
| df = pd.read_json(data_file.name) | |
| except Exception as e: | |
| return f"Failed to read file: {str(e)}" | |
| metadata = "\n".join([f"- {col}: {str(dtype)}" for col, dtype in df.dtypes.items()]) | |
| preview = df.head(30).to_csv(index=False) | |
| system_prompt = """ | |
| You are a professional data analyst AI that specializes in summarizing datasets and uncovering insights. | |
| Your task is to: | |
| 1. Provide a high-level summary of the dataset. | |
| 2. Identify 5 insightful observations or trends. | |
| 3. Highlight any interesting patterns, anomalies, or correlations you find. | |
| 4. Use simple, clear language understandable to non-technical users. | |
| 5. If relevant, suggest what kind of decisions or actions could be made based on the data. | |
| Only use the provided preview and schema — do not assume missing data or guess columns. | |
| Be helpful, concise, and specific. | |
| """ | |
| user_prompt = f""" | |
| Here is a preview of the dataset (first 30 rows): | |
| {preview} | |
| Here is the schema (column name: type): | |
| {metadata} | |
| Please generate: | |
| - A brief overview of the dataset. | |
| - Five unique, data-driven insights or summaries. | |
| - Optional: Patterns or anomalies worth noting. | |
| - Keep it understandable and actionable. | |
| """ | |
| chat_completion = client.chat.completions.create( | |
| messages=[ | |
| {"role": "system", "content": system_prompt.strip()}, | |
| {"role": "user", "content": user_prompt.strip()} | |
| ], | |
| model="llama3-70b-8192" | |
| ) | |
| return chat_completion.choices[0].message.content.strip() | |
| ### --- Gradio Interface --- ### | |
| tab1 = gr.Interface( | |
| fn=response, | |
| inputs=gr.JSON(label="Input JSON (question, schema)"), | |
| outputs="text", | |
| title="SQL Generator (Groq + LLaMA3)", | |
| description="Input: question & table metadata. Output: SQL using dynamic schema." | |
| ) | |
| tab2 = gr.Interface( | |
| fn=explain_sql_output, | |
| inputs=[ | |
| gr.Textbox(label="SQL Query"), | |
| gr.Textbox(label="SQL Output (Raw JSON or Table Result)") | |
| ], | |
| outputs="text", | |
| title="Explain SQL Result (Groq + LLaMA3)", | |
| description="Input a SQL query and its result. Get an AI-generated explanation." | |
| ) | |
| tab3 = gr.Interface( | |
| fn=summarize_data, | |
| inputs=gr.File(label="Upload CSV or JSON Dataset"), | |
| outputs="text", | |
| title="Data Summary & Insights (Groq + LLaMA3)", | |
| description="Upload a dataset to get a general summary and 5 AI-generated insights." | |
| ) | |
| demo = gr.TabbedInterface([tab1, tab2], ["SQL Generator", "Explain Output"]) | |
| if __name__ == '__main__': | |
| demo.launch() |