Spaces:

PD03
/

talk_to_data

Sleeping

App Files Files Community

PD03 commited on Jun 26, 2025

Commit

e784f1e

verified ·

1 Parent(s): d34c686

Update app.py

Browse files

Files changed (1) hide show

app.py +69 -46

app.py CHANGED Viewed

@@ -1,62 +1,85 @@
 import gradio as gr
 import pandas as pd
-import torch
 import duckdb
-from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
-# Load data into DuckDB
 df = pd.read_csv('synthetic_profit.csv')
-con = duckdb.connect(':memory:')
-con.register('sap', df)
-# One-line schema for prompts
-schema = ", ".join(df.columns)
-# Load TAPEX for SQL generation
-MODEL_ID = "microsoft/tapex-base-finetuned-wikisql"
-device = 0 if torch.cuda.is_available() else -1
-tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
-model     = AutoModelForSeq2SeqLM.from_pretrained(MODEL_ID)
-sql_gen = pipeline(
-    "text2text-generation",
-    model=model,
-    tokenizer=tokenizer,
-    framework="pt",
-    device=device,
-    max_length=128,
-)
-def answer_profitability(question: str) -> str:
-    # 1) Generate SQL
-    prompt = (
-        f"-- Translate to SQL for table `sap` ({schema})\n"
-        f"Question: {question}\n"
-        "SQL:"
     )
-    sql = sql_gen(prompt)[0]['generated_text'].strip()
-    # 2) Try to execute it
     try:
-        df_out = con.execute(sql).df()
     except Exception as e:
-        # Use a normal f-string with explicit \n for newlines
         return (
-            f"❌ **SQL Error**\n"
             f"```\n{e}\n```\n\n"
-            f"**Generated SQL**\n"
-            f"```sql\n{sql}\n```"
-        )
-    # 3) Format successful result
-    if df_out.empty:
-        return (
-            "No rows returned.\n\n"
             f"**Generated SQL**\n```sql\n{sql}\n```"
         )
-    if df_out.shape == (1,1):
-        return str(df_out.iat[0,0])
-    return df_out.to_markdown(index=False)

+import os
 import gradio as gr
 import pandas as pd
 import duckdb
+import openai
+# ─── 1) Set your OpenAI key via the SECRET: OPENAI_API_KEY ───────────────────
+openai.api_key = os.getenv("
+![image/png](https://cdn-uploads.huggingface.co/production/uploads/651bae43dea81981d501e862/YGXP5ediv-1OOqmcZh8YA.png)
+")
+# ─── 2) Load your synthetic data into DuckDB ─────────────────────────────────
 df = pd.read_csv('synthetic_profit.csv')
+conn = duckdb.connect(':memory:')
+conn.register('sap', df)
+# ─── 3) One-line schema description for prompting ─────────────────────────────
+schema = ", ".join(df.columns)
+# e.g. "Region,Product,FiscalYear,FiscalQuarter,Revenue,Profit,ProfitMargin"
+# ─── 4) Function to call OpenAI and generate SQL ──────────────────────────────
+def generate_sql(question: str) -> str:
+    system = (
+        f"You are an expert SQL generator for a DuckDB table named `sap` "
+        f"with columns: {schema}. "
+        "Generate a valid SQL query that returns exactly what the user is asking. "
+        "Only return the SQL query, without any explanation."
+    )
+    messages = [
+        {"role": "system",  "content": system},
+        {"role": "user",    "content": question}
+    ]
+    resp = openai.ChatCompletion.create(
+        model="gpt-3.5-turbo",
+        messages=messages,
+        temperature=0.0,
+        max_tokens=150,
     )
+    sql = resp.choices[0].message.content.strip()
+    # Strip triple-backticks if present
+    if sql.startswith("```") and "```" in sql[3:]:
+        sql = "\n".join(sql.splitlines()[1:-1])
+    return sql
+# ─── 5) Core QA function: NL → SQL → execute → format result ─────────────────
+def answer_profitability(question: str) -> str:
+    # 5a) Generate SQL
+    sql = generate_sql(question)
+    # 5b) Try to run it
     try:
+        out_df = conn.execute(sql).df()
     except Exception as e:
         return (
+            f"❌ **Error executing SQL**\n\n"
             f"```\n{e}\n```\n\n"
             f"**Generated SQL**\n```sql\n{sql}\n```"
         )
+    # 5c) Format the successful result
+    if out_df.empty:
+        return f"No rows returned.\n\n**SQL**\n```sql\n{sql}\n```"
+    # Single‐cell result → scalar
+    if out_df.shape == (1,1):
+        return str(out_df.iat[0,0])
+    # Otherwise → markdown table
+    return out_df.to_markdown(index=False)
+# ─── 6) Gradio UI ─────────────────────────────────────────────────────────────
+iface = gr.Interface(
+    fn=answer_profitability,
+    inputs=gr.Textbox(lines=2, placeholder="Ask a question about profitability…"),
+    outputs=gr.Markdown(),
+    title="SAP Profitability Q&A (OpenAI → SQL → DuckDB)",
+    description=(
+        "Uses OpenAI’s GPT-3.5-Turbo to translate your question into SQL, "
+        "executes it on the `sap` table in DuckDB, and returns the result."
+    ),
+    allow_flagging="never",
+)
+if __name__ == "__main__":
+    iface.launch(server_name="0.0.0.0", server_port=7860)