import gradio as gr import pandas as pd import sqlite3 import plotly.express as px import tempfile # Optional: OpenAI + LangChain imports if using NL -> SQL try: from langchain_openai import OpenAI from langchain_experimental.sql import SQLDatabaseChain from langchain.sql_database import SQLDatabase OPENAI_AVAILABLE = True except: OPENAI_AVAILABLE = False def process_file(file, question): if file is None or question.strip() == "": return "Upload a file and ask a question.", None, None # Read uploaded file try: fname = file.name.lower() if fname.endswith(".csv"): df = pd.read_csv(file.name) else: df = pd.read_excel(file.name) except Exception as e: return f"Error reading file: {e}", None, None # Save to temporary SQLite file try: temp_db_file = tempfile.NamedTemporaryFile(suffix=".db").name conn = sqlite3.connect(temp_db_file) df.to_sql("data", conn, if_exists="replace", index=False) conn.close() except Exception as e: return f"Error creating database: {e}", None, None # Run natural language -> SQL if OpenAI available result_text = "Rule-based: Showing first 5 rows" result_df = df.head(5) # Default fallback if OPENAI_AVAILABLE: try: db = SQLDatabase.from_uri(f"sqlite:///{temp_db_file}") llm = OpenAI(temperature=0) db_chain = SQLDatabaseChain.from_llm(llm, db, verbose=False) result_text = db_chain.run(question) # Execute the SQL safely to show result table conn = sqlite3.connect(temp_db_file) result_df = pd.read_sql_query("SELECT * FROM data LIMIT 100", conn) conn.close() except Exception as e: result_text = f"OpenAI NL->SQL failed, showing fallback: {e}" result_df = df.head(5) # Visualization fig = None try: numeric_cols = result_df.select_dtypes(include=['number']).columns.tolist() categorical_cols = result_df.select_dtypes(include=['object']).columns.tolist() if numeric_cols and categorical_cols: x_col = categorical_cols[0] y_col = numeric_cols[0] fig = px.bar(result_df, x=x_col, y=y_col, title=f"{y_col} vs {x_col}") elif numeric_cols: fig = px.line(result_df[numeric_cols]) elif categorical_cols: fig = px.histogram(result_df, x=categorical_cols[0]) except Exception as e: fig = None # fail silently for visualization return result_text, result_df, fig # Gradio Interface file_input = gr.File(label="Upload CSV or Excel") question_input = gr.Textbox(label="Ask a question in natural language") output_text = gr.Textbox(label="Generated Result") output_table = gr.Dataframe(label="Query Result") output_plot = gr.Plot(label="Visualization") gr.Interface( fn=process_file, inputs=[file_input, question_input], outputs=[output_text, output_table, output_plot], live=False, title="NL → SQL Query Generator + Visualization", description="Upload CSV/Excel, ask natural language questions, and get SQL results with automatic visualizations." ).launch()