File size: 4,940 Bytes
9c0e838 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 |
import json
import pandas as pd
import matplotlib.pyplot as plt
import gradio as gr
from openai import OpenAI
# ==================================================
# OpenAI client helper (API key from user)
# ==================================================
def get_openai_client(api_key: str):
return OpenAI(api_key=api_key)
# ==================================================
# CSV Loader + Normalizer
# ==================================================
def load_and_normalize_csv(csv_file):
df = pd.read_csv(csv_file)
# Normalize column names
df.columns = (
df.columns
.str.strip()
.str.lower()
.str.replace(" ", "")
.str.replace("/", "")
.str.replace("_", "")
)
# Normalize drcr column if present
if "drcr" in df.columns:
df["drcr"] = (
df["drcr"]
.astype(str)
.str.strip()
.str.lower()
.replace({
"cr": "credit",
"db": "debit"
})
)
return df
# ==================================================
# Ask OpenAI for INTENT (STRICT JSON)
# ==================================================
def get_intent(question: str, api_key: str) -> dict:
client = get_openai_client(api_key)
response = client.chat.completions.create(
model="gpt-4o-mini",
response_format={"type": "json_object"},
messages=[
{
"role": "system",
"content": (
"You are a data analysis planner.\n"
"Return ONLY valid JSON.\n"
"Do NOT explain.\n\n"
"JSON format:\n"
"{\n"
' "action": "count | sum | plot",\n'
' "filters": { "year": number | null, "drcr": string | null },\n'
' "groupby": "year | drcr | null"\n'
"}"
)
},
{
"role": "user",
"content": question
}
]
)
return json.loads(response.choices[0].message.content)
# ==================================================
# Execute intent using real Python
# ==================================================
def execute_intent(intent: dict, df: pd.DataFrame):
data = df.copy()
# Apply filters
filters = intent.get("filters", {})
for key, value in filters.items():
if value is not None and key in data.columns:
data = data[data[key] == value]
action = intent.get("action")
group_col = intent.get("groupby")
# COUNT
if action == "count":
if group_col:
return data.groupby(group_col).size()
return len(data)
# SUM
if action == "sum":
if group_col:
return data.groupby(group_col)["amount"].sum()
return data["amount"].sum()
# PLOT
if action == "plot":
if not group_col:
raise ValueError("Plot requires groupby")
result = data.groupby(group_col).size()
result.plot(kind="bar")
plt.title("Result")
plt.tight_layout()
plt.show()
return result
raise ValueError(f"Unknown action: {action}")
# ==================================================
# End-to-end question answering
# ==================================================
def answer_question(question: str, api_key: str, df: pd.DataFrame):
intent = get_intent(question, api_key)
return execute_intent(intent, df)
# ==================================================
# Gradio wrapper
# ==================================================
def gradio_answer(api_key, csv_file, question):
try:
if not api_key:
return "Please provide your OpenAI API key."
if csv_file is None:
return "Please upload a CSV file."
if not question:
return "Please enter a question."
df = load_and_normalize_csv(csv_file)
result = answer_question(question, api_key, df)
if hasattr(result, "to_string"):
return result.to_string()
return str(result)
except Exception as e:
return f"Error: {str(e)}"
# ==================================================
# Gradio Interface (Spaces entry point)
# ==================================================
demo = gr.Interface(
fn=gradio_answer,
inputs=[
gr.Textbox(label="OpenAI API Key", type="password"),
gr.File(label="Upload CSV File", file_types=[".csv"]),
gr.Textbox(
label="Ask a question about your CSV",
placeholder="How many credit operations happened in 2022?"
)
],
outputs=gr.Textbox(label="Answer"),
title="Chat with your CSV 📊",
description="Upload any CSV file and ask natural language questions about it"
)
if __name__ == "__main__":
demo.launch() |