VitoVikram commited on
Commit
9c0e838
·
verified ·
1 Parent(s): 8cbc047

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +169 -0
app.py ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import pandas as pd
3
+ import matplotlib.pyplot as plt
4
+ import gradio as gr
5
+ from openai import OpenAI
6
+
7
+ # ==================================================
8
+ # OpenAI client helper (API key from user)
9
+ # ==================================================
10
+ def get_openai_client(api_key: str):
11
+ return OpenAI(api_key=api_key)
12
+
13
+ # ==================================================
14
+ # CSV Loader + Normalizer
15
+ # ==================================================
16
+ def load_and_normalize_csv(csv_file):
17
+ df = pd.read_csv(csv_file)
18
+
19
+ # Normalize column names
20
+ df.columns = (
21
+ df.columns
22
+ .str.strip()
23
+ .str.lower()
24
+ .str.replace(" ", "")
25
+ .str.replace("/", "")
26
+ .str.replace("_", "")
27
+ )
28
+
29
+ # Normalize drcr column if present
30
+ if "drcr" in df.columns:
31
+ df["drcr"] = (
32
+ df["drcr"]
33
+ .astype(str)
34
+ .str.strip()
35
+ .str.lower()
36
+ .replace({
37
+ "cr": "credit",
38
+ "db": "debit"
39
+ })
40
+ )
41
+
42
+ return df
43
+
44
+ # ==================================================
45
+ # Ask OpenAI for INTENT (STRICT JSON)
46
+ # ==================================================
47
+ def get_intent(question: str, api_key: str) -> dict:
48
+ client = get_openai_client(api_key)
49
+
50
+ response = client.chat.completions.create(
51
+ model="gpt-4o-mini",
52
+ response_format={"type": "json_object"},
53
+ messages=[
54
+ {
55
+ "role": "system",
56
+ "content": (
57
+ "You are a data analysis planner.\n"
58
+ "Return ONLY valid JSON.\n"
59
+ "Do NOT explain.\n\n"
60
+ "JSON format:\n"
61
+ "{\n"
62
+ ' "action": "count | sum | plot",\n'
63
+ ' "filters": { "year": number | null, "drcr": string | null },\n'
64
+ ' "groupby": "year | drcr | null"\n'
65
+ "}"
66
+ )
67
+ },
68
+ {
69
+ "role": "user",
70
+ "content": question
71
+ }
72
+ ]
73
+ )
74
+
75
+ return json.loads(response.choices[0].message.content)
76
+
77
+ # ==================================================
78
+ # Execute intent using real Python
79
+ # ==================================================
80
+ def execute_intent(intent: dict, df: pd.DataFrame):
81
+ data = df.copy()
82
+
83
+ # Apply filters
84
+ filters = intent.get("filters", {})
85
+ for key, value in filters.items():
86
+ if value is not None and key in data.columns:
87
+ data = data[data[key] == value]
88
+
89
+ action = intent.get("action")
90
+ group_col = intent.get("groupby")
91
+
92
+ # COUNT
93
+ if action == "count":
94
+ if group_col:
95
+ return data.groupby(group_col).size()
96
+ return len(data)
97
+
98
+ # SUM
99
+ if action == "sum":
100
+ if group_col:
101
+ return data.groupby(group_col)["amount"].sum()
102
+ return data["amount"].sum()
103
+
104
+ # PLOT
105
+ if action == "plot":
106
+ if not group_col:
107
+ raise ValueError("Plot requires groupby")
108
+
109
+ result = data.groupby(group_col).size()
110
+ result.plot(kind="bar")
111
+ plt.title("Result")
112
+ plt.tight_layout()
113
+ plt.show()
114
+ return result
115
+
116
+ raise ValueError(f"Unknown action: {action}")
117
+
118
+ # ==================================================
119
+ # End-to-end question answering
120
+ # ==================================================
121
+ def answer_question(question: str, api_key: str, df: pd.DataFrame):
122
+ intent = get_intent(question, api_key)
123
+ return execute_intent(intent, df)
124
+
125
+ # ==================================================
126
+ # Gradio wrapper
127
+ # ==================================================
128
+ def gradio_answer(api_key, csv_file, question):
129
+ try:
130
+ if not api_key:
131
+ return "Please provide your OpenAI API key."
132
+
133
+ if csv_file is None:
134
+ return "Please upload a CSV file."
135
+
136
+ if not question:
137
+ return "Please enter a question."
138
+
139
+ df = load_and_normalize_csv(csv_file)
140
+ result = answer_question(question, api_key, df)
141
+
142
+ if hasattr(result, "to_string"):
143
+ return result.to_string()
144
+
145
+ return str(result)
146
+
147
+ except Exception as e:
148
+ return f"Error: {str(e)}"
149
+
150
+ # ==================================================
151
+ # Gradio Interface (Spaces entry point)
152
+ # ==================================================
153
+ demo = gr.Interface(
154
+ fn=gradio_answer,
155
+ inputs=[
156
+ gr.Textbox(label="OpenAI API Key", type="password"),
157
+ gr.File(label="Upload CSV File", file_types=[".csv"]),
158
+ gr.Textbox(
159
+ label="Ask a question about your CSV",
160
+ placeholder="How many credit operations happened in 2022?"
161
+ )
162
+ ],
163
+ outputs=gr.Textbox(label="Answer"),
164
+ title="Chat with your CSV 📊",
165
+ description="Upload any CSV file and ask natural language questions about it"
166
+ )
167
+
168
+ if __name__ == "__main__":
169
+ demo.launch()