VitoVikram's picture
Create app.py
9c0e838 verified
import json
import pandas as pd
import matplotlib.pyplot as plt
import gradio as gr
from openai import OpenAI
# ==================================================
# OpenAI client helper (API key from user)
# ==================================================
def get_openai_client(api_key: str):
return OpenAI(api_key=api_key)
# ==================================================
# CSV Loader + Normalizer
# ==================================================
def load_and_normalize_csv(csv_file):
df = pd.read_csv(csv_file)
# Normalize column names
df.columns = (
df.columns
.str.strip()
.str.lower()
.str.replace(" ", "")
.str.replace("/", "")
.str.replace("_", "")
)
# Normalize drcr column if present
if "drcr" in df.columns:
df["drcr"] = (
df["drcr"]
.astype(str)
.str.strip()
.str.lower()
.replace({
"cr": "credit",
"db": "debit"
})
)
return df
# ==================================================
# Ask OpenAI for INTENT (STRICT JSON)
# ==================================================
def get_intent(question: str, api_key: str) -> dict:
client = get_openai_client(api_key)
response = client.chat.completions.create(
model="gpt-4o-mini",
response_format={"type": "json_object"},
messages=[
{
"role": "system",
"content": (
"You are a data analysis planner.\n"
"Return ONLY valid JSON.\n"
"Do NOT explain.\n\n"
"JSON format:\n"
"{\n"
' "action": "count | sum | plot",\n'
' "filters": { "year": number | null, "drcr": string | null },\n'
' "groupby": "year | drcr | null"\n'
"}"
)
},
{
"role": "user",
"content": question
}
]
)
return json.loads(response.choices[0].message.content)
# ==================================================
# Execute intent using real Python
# ==================================================
def execute_intent(intent: dict, df: pd.DataFrame):
data = df.copy()
# Apply filters
filters = intent.get("filters", {})
for key, value in filters.items():
if value is not None and key in data.columns:
data = data[data[key] == value]
action = intent.get("action")
group_col = intent.get("groupby")
# COUNT
if action == "count":
if group_col:
return data.groupby(group_col).size()
return len(data)
# SUM
if action == "sum":
if group_col:
return data.groupby(group_col)["amount"].sum()
return data["amount"].sum()
# PLOT
if action == "plot":
if not group_col:
raise ValueError("Plot requires groupby")
result = data.groupby(group_col).size()
result.plot(kind="bar")
plt.title("Result")
plt.tight_layout()
plt.show()
return result
raise ValueError(f"Unknown action: {action}")
# ==================================================
# End-to-end question answering
# ==================================================
def answer_question(question: str, api_key: str, df: pd.DataFrame):
intent = get_intent(question, api_key)
return execute_intent(intent, df)
# ==================================================
# Gradio wrapper
# ==================================================
def gradio_answer(api_key, csv_file, question):
try:
if not api_key:
return "Please provide your OpenAI API key."
if csv_file is None:
return "Please upload a CSV file."
if not question:
return "Please enter a question."
df = load_and_normalize_csv(csv_file)
result = answer_question(question, api_key, df)
if hasattr(result, "to_string"):
return result.to_string()
return str(result)
except Exception as e:
return f"Error: {str(e)}"
# ==================================================
# Gradio Interface (Spaces entry point)
# ==================================================
demo = gr.Interface(
fn=gradio_answer,
inputs=[
gr.Textbox(label="OpenAI API Key", type="password"),
gr.File(label="Upload CSV File", file_types=[".csv"]),
gr.Textbox(
label="Ask a question about your CSV",
placeholder="How many credit operations happened in 2022?"
)
],
outputs=gr.Textbox(label="Answer"),
title="Chat with your CSV 📊",
description="Upload any CSV file and ask natural language questions about it"
)
if __name__ == "__main__":
demo.launch()