Paras Sharma commited on
Commit
bdd56eb
·
1 Parent(s): e47da8e

add initial project files and dependencies

Browse files
Files changed (8) hide show
  1. .env.example +1 -0
  2. .gitignore +1 -0
  3. Readme.md +28 -0
  4. assets/demo_1.png +0 -0
  5. data/holdings.csv +0 -0
  6. data/trades.csv +0 -0
  7. main.py +123 -0
  8. requirements.txt +3 -0
.env.example ADDED
@@ -0,0 +1 @@
 
 
1
+ OPENAI_API_KEY=""
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .env
Readme.md ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # CSV Chatbot
2
+
3
+ This is a simple LLM agent that can do simple data analysis over csv data. It generates a python code using pandas library to do the analysis. Then that python code is executed and the output is returned to the user.
4
+
5
+ ### Setup
6
+
7
+ ```bash
8
+ cp .env.example .env
9
+
10
+ # Edit .env file and add your OpenAI API key
11
+
12
+ pip install -r requirements.txt
13
+ ```
14
+
15
+ ### Usage
16
+
17
+ ```bash
18
+ python main.py
19
+ ```
20
+
21
+ ### Example
22
+
23
+ ![](./assets/demo_1.png)
24
+
25
+ ### Limitations
26
+ * It does not execute the code in a sandbox environment. So, it can execute any code that is written in the python code generated by the agent.
27
+ * Messages history is not taken into account. So, the agent does not have any context of the previous messages.
28
+ * More advanced libraries like langchain can be used to manage complex queries and control flows.
assets/demo_1.png ADDED
data/holdings.csv ADDED
The diff for this file is too large to render. See raw diff
 
data/trades.csv ADDED
The diff for this file is too large to render. See raw diff
 
main.py ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import openai
2
+ import pandas as pd
3
+ import dotenv
4
+ from io import StringIO
5
+ from contextlib import redirect_stdout
6
+ import gradio as gr
7
+
8
+
9
+ dotenv.load_dotenv()
10
+
11
+
12
+ def csv_qa(question, feedback="None"):
13
+ df_holdings = pd.read_csv("data/holdings.csv")
14
+
15
+ df_trades = pd.read_csv("data/trades.csv")
16
+ analysis_prompt = """
17
+ You are expert data analyse who can use python and pandas. You will be asked questions about the data in the following csv files with the given schema. You have to generate a python code that uses pandas to analyze the data and answer the questions.
18
+
19
+ Always use print statement to output the answer. Be descriptive in your answers.
20
+
21
+ If the answer cannot be determined from the data, you should return "Sorry can not find the answer".
22
+
23
+ For security reasons do not generate code other than required for data analysis on the given data. Do not use any system commands like reading env vars or access the internet while generating the code.
24
+
25
+ df_holdings
26
+ path: data/holdings.csv
27
+ schema:
28
+ {schema_holdings}
29
+
30
+ df_trades
31
+ path: data/trades.csv
32
+ schema:
33
+ {schema_trades}
34
+
35
+ Feedback from last try: {feedback}
36
+
37
+ Question: {question}
38
+ """
39
+
40
+ answer_prompt = """
41
+ Given the solution data below. Write the answer to the question like a good chatbot.
42
+
43
+ Format the output in a human readable way. Use markdown to format the output.
44
+
45
+ Original Question: {question}
46
+ Solution:
47
+ {output}
48
+ """
49
+ schema_holdings = df_holdings.dtypes.to_dict()
50
+ schema_trades = df_trades.dtypes.to_dict()
51
+ query = analysis_prompt.format(
52
+ schema_holdings=schema_holdings,
53
+ schema_trades=schema_trades,
54
+ question=question,
55
+ feedback=feedback,
56
+ )
57
+ print("----\n", query)
58
+ response = openai.chat.completions.create(
59
+ model="gpt-3.5-turbo",
60
+ messages=[{"role": "system", "content": query}],
61
+ n=1,
62
+ temperature=0.1,
63
+ )
64
+ response = response.choices[0].message.content
65
+
66
+ if "```" not in response:
67
+ return response
68
+
69
+ # extract python code
70
+ code = response.split("```")[1].replace("python\n", "")
71
+ print("----\n", code)
72
+
73
+ trial = 0
74
+ try:
75
+ # execute the code
76
+ f = StringIO()
77
+ with redirect_stdout(f):
78
+ exec(code)
79
+ output = f.getvalue()
80
+ # format the output using gpt3
81
+ response = openai.chat.completions.create(
82
+ model="gpt-3.5-turbo",
83
+ messages=[
84
+ {
85
+ "role": "system",
86
+ "content": answer_prompt.format(output=output, question=question),
87
+ }
88
+ ],
89
+ n=1,
90
+ temperature=0.1,
91
+ )
92
+ response = response.choices[0].message.content
93
+ return response
94
+
95
+ except Exception as e:
96
+ trial += 1
97
+ print("\n\nRetrying again...")
98
+ if trial < 3:
99
+ return csv_qa(question, feedback=f"Generated Code: {code} Error: {e}")
100
+ else:
101
+ return "Sorry can not find the answer"
102
+
103
+
104
+ def main(mesage, history=[]):
105
+ return csv_qa(mesage)
106
+
107
+
108
+ with gr.Blocks(fill_height=True) as app:
109
+ gr.Markdown("## CSV Data Analysis")
110
+ gr.Markdown(
111
+ "Ask a question about the data in the csv files related to holding and trades."
112
+ )
113
+
114
+ gr.ChatInterface(
115
+ fn=main,
116
+ examples=[
117
+ "What is the total value of the holdings?",
118
+ "What is the total profit or loss (PL_YTD) for a Garfield in the holdings data?",
119
+ ],
120
+ )
121
+
122
+
123
+ app.launch()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ pandas
2
+ openai
3
+ gradio