import json import pandas as pd import matplotlib.pyplot as plt import gradio as gr from openai import OpenAI # ================================================== # OpenAI client helper (API key from user) # ================================================== def get_openai_client(api_key: str): return OpenAI(api_key=api_key) # ================================================== # CSV Loader + Normalizer # ================================================== def load_and_normalize_csv(csv_file): df = pd.read_csv(csv_file) # Normalize column names df.columns = ( df.columns .str.strip() .str.lower() .str.replace(" ", "") .str.replace("/", "") .str.replace("_", "") ) # Normalize drcr column if present if "drcr" in df.columns: df["drcr"] = ( df["drcr"] .astype(str) .str.strip() .str.lower() .replace({ "cr": "credit", "db": "debit" }) ) return df # ================================================== # Ask OpenAI for INTENT (STRICT JSON) # ================================================== def get_intent(question: str, api_key: str) -> dict: client = get_openai_client(api_key) response = client.chat.completions.create( model="gpt-4o-mini", response_format={"type": "json_object"}, messages=[ { "role": "system", "content": ( "You are a data analysis planner.\n" "Return ONLY valid JSON.\n" "Do NOT explain.\n\n" "JSON format:\n" "{\n" ' "action": "count | sum | plot",\n' ' "filters": { "year": number | null, "drcr": string | null },\n' ' "groupby": "year | drcr | null"\n' "}" ) }, { "role": "user", "content": question } ] ) return json.loads(response.choices[0].message.content) # ================================================== # Execute intent using real Python # ================================================== def execute_intent(intent: dict, df: pd.DataFrame): data = df.copy() # Apply filters filters = intent.get("filters", {}) for key, value in filters.items(): if value is not None and key in data.columns: data = data[data[key] == value] action = intent.get("action") group_col = intent.get("groupby") # COUNT if action == "count": if group_col: return data.groupby(group_col).size() return len(data) # SUM if action == "sum": if group_col: return data.groupby(group_col)["amount"].sum() return data["amount"].sum() # PLOT if action == "plot": if not group_col: raise ValueError("Plot requires groupby") result = data.groupby(group_col).size() result.plot(kind="bar") plt.title("Result") plt.tight_layout() plt.show() return result raise ValueError(f"Unknown action: {action}") # ================================================== # End-to-end question answering # ================================================== def answer_question(question: str, api_key: str, df: pd.DataFrame): intent = get_intent(question, api_key) return execute_intent(intent, df) # ================================================== # Gradio wrapper # ================================================== def gradio_answer(api_key, csv_file, question): try: if not api_key: return "Please provide your OpenAI API key." if csv_file is None: return "Please upload a CSV file." if not question: return "Please enter a question." df = load_and_normalize_csv(csv_file) result = answer_question(question, api_key, df) if hasattr(result, "to_string"): return result.to_string() return str(result) except Exception as e: return f"Error: {str(e)}" # ================================================== # Gradio Interface (Spaces entry point) # ================================================== demo = gr.Interface( fn=gradio_answer, inputs=[ gr.Textbox(label="OpenAI API Key", type="password"), gr.File(label="Upload CSV File", file_types=[".csv"]), gr.Textbox( label="Ask a question about your CSV", placeholder="How many credit operations happened in 2022?" ) ], outputs=gr.Textbox(label="Answer"), title="Chat with your CSV 📊", description="Upload any CSV file and ask natural language questions about it" ) if __name__ == "__main__": demo.launch()