File size: 2,494 Bytes
adca139
807b1cf
 
 
 
 
 
adca139
a824eae
 
 
 
 
adca139
a824eae
807b1cf
 
 
 
 
adca139
807b1cf
adca139
807b1cf
 
adca139
807b1cf
 
 
 
 
 
 
 
 
 
 
adca139
807b1cf
 
 
 
adca139
a824eae
807b1cf
 
 
 
 
 
 
adca139
a824eae
807b1cf
 
 
 
 
 
 
 
 
adca139
a824eae
807b1cf
 
 
a824eae
807b1cf
 
 
 
adca139
a824eae
adca139
07f734b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import gradio as gr
import pandas as pd
import duckdb
import requests
import re
import io
import os

# βœ… Read API Key
TOGETHER_API_KEY = os.environ.get("TOGETHER_API_KEY")

if not TOGETHER_API_KEY:
    raise RuntimeError("❌ TOGETHER_API_KEY not found. Please set it in Hugging Face Secrets tab.")

# πŸ” SQL generation
def generate_sql_from_prompt(prompt, df):
    schema = ", ".join([f"{col} ({str(dtype)})" for col, dtype in df.dtypes.items()])
    full_prompt = f"""
You are a SQL expert. Here is a table called 'df' with the following schema:
{schema}

User question: "{prompt}"

Write a valid SQL query using the 'df' table. Return only the SQL code.
"""

    url = "https://api.together.xyz/v1/chat/completions"
    headers = {
        "Authorization": f"Bearer {TOGETHER_API_KEY}",
        "Content-Type": "application/json"
    }
    payload = {
        "model": "mistralai/Mixtral-8x7B-Instruct-v0.1",
        "messages": [{"role": "user", "content": full_prompt}],
        "temperature": 0.2,
        "max_tokens": 200
    }

    response = requests.post(url, headers=headers, json=payload)
    response.raise_for_status()
    result = response.json()
    return result['choices'][0]['message']['content'].strip("```sql").strip("```").strip()

# 🧽 SQL cleanup
def clean_sql_for_duckdb(sql, df_columns):
    sql = sql.replace("`", '"')
    for col in df_columns:
        if " " in col and f'"{col}"' not in sql:
            pattern = r'\b' + re.escape(col) + r'\b'
            sql = re.sub(pattern, f'"{col}"', sql)
    return sql

# πŸ”„ Main Gradio function
def chatbot_interface(file, question):
    try:
        df = pd.read_excel(file)
        sql = generate_sql_from_prompt(question, df)
        cleaned_sql = clean_sql_for_duckdb(sql, df.columns)
        result = duckdb.query(cleaned_sql).to_df()
        return f"πŸ“œ SQL Query:\n```sql\n{sql}\n```", result
    except Exception as e:
        return f"❌ Error: {str(e)}", pd.DataFrame()

# 🧱 Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("## πŸ“Š Excel SQL Chatbot with Together API")
    file_input = gr.File(label="πŸ“‚ Upload Excel File (.xlsx)")
    question = gr.Textbox(label="🧠 Ask a question about your data")
    submit = gr.Button("πŸš€ Generate & Query")
    sql_output = gr.Markdown()
    result_table = gr.Dataframe()
    submit.click(fn=chatbot_interface, inputs=[file_input, question], outputs=[sql_output, result_table])

# ▢️ Run the app
if __name__ == "__main__":
    demo.launch()