Spaces:

indhupamula
/

sql

Build error

sql

File size: 3,253 Bytes

3ed4fdc

import gradio as gr
import pandas as pd
import sqlite3
import plotly.express as px
import tempfile

# Optional: OpenAI + LangChain imports if using NL -> SQL
try:
    from langchain_openai import OpenAI
    from langchain_experimental.sql import SQLDatabaseChain
    from langchain.sql_database import SQLDatabase
    OPENAI_AVAILABLE = True
except:
    OPENAI_AVAILABLE = False

def process_file(file, question):
    if file is None or question.strip() == "":
        return "Upload a file and ask a question.", None, None

    # Read uploaded file
    try:
        fname = file.name.lower()
        if fname.endswith(".csv"):
            df = pd.read_csv(file.name)
        else:
            df = pd.read_excel(file.name)
    except Exception as e:
        return f"Error reading file: {e}", None, None

    # Save to temporary SQLite file
    try:
        temp_db_file = tempfile.NamedTemporaryFile(suffix=".db").name
        conn = sqlite3.connect(temp_db_file)
        df.to_sql("data", conn, if_exists="replace", index=False)
        conn.close()
    except Exception as e:
        return f"Error creating database: {e}", None, None

    # Run natural language -> SQL if OpenAI available
    result_text = "Rule-based: Showing first 5 rows"
    result_df = df.head(5)  # Default fallback

    if OPENAI_AVAILABLE:
        try:
            db = SQLDatabase.from_uri(f"sqlite:///{temp_db_file}")
            llm = OpenAI(temperature=0)
            db_chain = SQLDatabaseChain.from_llm(llm, db, verbose=False)
            result_text = db_chain.run(question)
            # Execute the SQL safely to show result table
            conn = sqlite3.connect(temp_db_file)
            result_df = pd.read_sql_query("SELECT * FROM data LIMIT 100", conn)
            conn.close()
        except Exception as e:
            result_text = f"OpenAI NL->SQL failed, showing fallback: {e}"
            result_df = df.head(5)

    # Visualization
    fig = None
    try:
        numeric_cols = result_df.select_dtypes(include=['number']).columns.tolist()
        categorical_cols = result_df.select_dtypes(include=['object']).columns.tolist()

        if numeric_cols and categorical_cols:
            x_col = categorical_cols[0]
            y_col = numeric_cols[0]
            fig = px.bar(result_df, x=x_col, y=y_col, title=f"{y_col} vs {x_col}")
        elif numeric_cols:
            fig = px.line(result_df[numeric_cols])
        elif categorical_cols:
            fig = px.histogram(result_df, x=categorical_cols[0])
    except Exception as e:
        fig = None  # fail silently for visualization

    return result_text, result_df, fig

# Gradio Interface
file_input = gr.File(label="Upload CSV or Excel")
question_input = gr.Textbox(label="Ask a question in natural language")
output_text = gr.Textbox(label="Generated Result")
output_table = gr.Dataframe(label="Query Result")
output_plot = gr.Plot(label="Visualization")

gr.Interface(
    fn=process_file,
    inputs=[file_input, question_input],
    outputs=[output_text, output_table, output_plot],
    live=False,
    title="NL → SQL Query Generator + Visualization",
    description="Upload CSV/Excel, ask natural language questions, and get SQL results with automatic visualizations."
).launch()