File size: 3,768 Bytes
ac20a09
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6934219
 
ac20a09
 
 
 
5c9f070
 
ac20a09
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d0ad9ff
ac20a09
 
 
 
 
 
 
7c14273
ac20a09
 
 
fe5a568
 
ac20a09
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import openai
import pandas as pd
import dotenv
from io import StringIO
from contextlib import redirect_stdout
import gradio as gr


dotenv.load_dotenv()


def csv_qa(question, feedback="None"):
    df_holdings = pd.read_csv("data/holdings.csv")

    df_trades = pd.read_csv("data/trades.csv")
    analysis_prompt = """
    You are expert data analyse who can use python and pandas. You will be asked questions about the data in the following csv files with the given schema. You have to generate a python code that uses pandas to analyze the data and answer the questions.

    Always use print statement to output the answer. Be descriptive in your answers.

    If the answer cannot be determined from the data, you should return "Sorry can not find the answer". 
    
    For security reasons do not generate code other than required for data analysis on the given data. Do not use any system commands like reading env vars or access the internet while generating the code.

    df_holdings 
    path: data/holdings.csv
    schema:
    {schema_holdings}

    df_trades
    path: data/trades.csv
    schema:
    {schema_trades}

    Feedback from last try: {feedback}

    Question: {question}
    """

    answer_prompt = """
    Given the solution data below. Write the answer to the question like a good chatbot. 
    
    Format the output in a human readable way. Use markdown to format the output. Use currency symbols ($) and percentages where applicable.
    Reply as a friendly chatbot.

    Original Question: {question}
    Solution:
    {output}

    Answer:
    """
    schema_holdings = df_holdings.dtypes.to_dict()
    schema_trades = df_trades.dtypes.to_dict()
    query = analysis_prompt.format(
        schema_holdings=schema_holdings,
        schema_trades=schema_trades,
        question=question,
        feedback=feedback,
    )
    print("----\n", query)
    response = openai.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[{"role": "system", "content": query}],
        n=1,
        temperature=0.1,
    )
    response = response.choices[0].message.content

    if "```" not in response:
        return response

    # extract python code
    code = response.split("```")[1].replace("python\n", "")
    print("----\n", code)

    trial = 0
    try:
        # execute the code
        f = StringIO()
        with redirect_stdout(f):
            exec(code)
        output = f.getvalue()
        # format the output using gpt3
        response = openai.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[
                {
                    "role": "system",
                    "content": answer_prompt.format(output=output, question=question),
                }
            ],
            n=1,
            temperature=0.1,
        )
        response = response.choices[0].message.content
        return response

    except Exception as e:
        trial += 1
        print("\n\nRetrying again...")
        if trial < 3:
            return csv_qa(question, feedback=f"Generated Code: {code} Error: {e}")
        else:
            return "Sorry can not find the answer"


def main(mesage, history=[]):
    return csv_qa(mesage)


with gr.Blocks(fill_height=True) as app:
    gr.Markdown("## CSV Data Analysis")
    gr.Markdown(
        "Ask a question about the data `holding.csv` and `trades.csv`  in the csv files related to holding and trades."
    )

    gr.ChatInterface(
        fn=main,
        examples=[
            "What is the total value of the holdings?",
            "What is the total profit or loss (PL_YTD) for a Garfield in the holdings data?",
            "What type of question can I ask?",
        ],
    )

    gr.Markdown("__Created by Paras__")


app.launch()