File size: 2,699 Bytes
17225cb
 
 
2fba167
17225cb
 
 
215dcfb
17225cb
 
 
 
 
 
 
 
 
215dcfb
17225cb
 
215dcfb
 
17225cb
adca139
17225cb
adca139
215dcfb
 
f173eae
 
807b1cf
 
 
 
 
f173eae
 
 
215dcfb
807b1cf
adca139
807b1cf
17225cb
 
f173eae
17225cb
 
 
 
 
 
 
 
 
 
215dcfb
17225cb
cfc4c6b
17225cb
 
 
 
 
 
af708e4
 
2c600a0
17225cb
 
 
 
 
 
 
 
 
 
 
 
adca139
17225cb
 
f173eae
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import gradio as gr
import pandas as pd
import duckdb
import requests
import re
import os

# βœ… Securely load Together API Key
def get_together_api_key():
    key = os.environ.get("TOGETHER_API_KEY")
    if key:
        print("βœ… TOGETHER_API_KEY loaded successfully.")
        return key
    raise RuntimeError("❌ TOGETHER_API_KEY not found. Set it in Hugging Face > Settings > Secrets.")

TOGETHER_API_KEY = get_together_api_key()

# 🧠 Generate SQL from prompt using Together's /inference endpoint
def generate_sql_from_prompt(prompt, df):
    schema = ", ".join([f"{col} ({str(dtype)})" for col, dtype in df.dtypes.items()])
    full_prompt = f"""
You are a SQL expert. Here is a table called 'df' with the following schema:
{schema}

User question: "{prompt}"

Write a valid SQL query using the 'df' table. Return only the SQL code.
"""

    url = "https://api.together.xyz/inference"
    headers = {
        "Authorization": f"Bearer {TOGETHER_API_KEY}",
        "Content-Type": "application/json"
    }
    payload = {
        "model": "meta-llama/Llama-3-8B-Instruct",
        "prompt": full_prompt,
        "max_tokens": 300,
        "temperature": 0.7,
    }

    response = requests.post(url, headers=headers, json=payload)
    response.raise_for_status()
    result = response.json()
    return result['output'].strip("```sql").strip("```").strip()

# 🧽 Clean SQL for DuckDB compatibility
def clean_sql_for_duckdb(sql, df_columns):
    sql = sql.replace("`", '"')
    for col in df_columns:
        if " " in col and f'"{col}"' not in sql:
            pattern = r'\b' + re.escape(col) + r'\b'
            sql = re.sub(pattern, f'"{col}"', sql)
    return sql

# πŸ’¬ Main chatbot function
def chatbot_interface(file, question):
    try:
        df = pd.read_excel(file)
        sql = generate_sql_from_prompt(question, df)
        cleaned_sql = clean_sql_for_duckdb(sql, df.columns)
        result = duckdb.query(cleaned_sql).to_df()
        return f"πŸ“œ SQL Query:\n```sql\n{sql}\n```", result
    except Exception as e:
        return f"❌ Error: {str(e)}", pd.DataFrame()



# πŸŽ›οΈ Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("## πŸ“Š Excel SQL Chatbot with Together API")
    with gr.Row():
        file_input = gr.File(label="πŸ“‚ Upload Excel File (.xlsx)")
        question = gr.Textbox(label="🧠 Ask a question", placeholder="e.g., Show average salary by department")
    submit = gr.Button("πŸš€ Generate & Query")
    sql_output = gr.Markdown()
    result_table = gr.Dataframe()

    submit.click(fn=chatbot_interface, inputs=[file_input, question], outputs=[sql_output, result_table])

# πŸš€ Launch the app
if __name__ == "__main__":
    demo.launch()