alishabhale commited on
Commit
241748e
·
1 Parent(s): 93b7f65

Initial commit

Browse files
Files changed (2) hide show
  1. app.py +93 -0
  2. requirements.txt +8 -0
app.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ import gradio as gr
4
+ import pandas as pd
5
+ import openai # Using OpenAI GPT-4 Turbo
6
+ from gradio import Chatbot
7
+ from gradio.data_classes import FileData
8
+
9
+ # Set OpenAI API Key (Ensure it's in your environment variables)
10
+ openai.api_key = os.getenv("OPENAI_API_KEY")
11
+
12
+ base_prompt = """You are an expert data analyst.
13
+ According to the features you have and the data structure given below, determine which feature should be the target.
14
+ Then list 3 interesting questions that could be asked on this data, for instance about specific correlations with the target variable.
15
+ Then answer these questions one by one, by finding the relevant numbers.
16
+ Meanwhile, plot some figures using matplotlib/seaborn and save them to the folder './figures/'.
17
+ Take care to clear each figure with plt.clf() before doing another plot.
18
+ In your final answer, summarize these correlations and trends.
19
+ After each number, derive real-world insights.
20
+
21
+ Structure of the data:
22
+ {structure_notes}
23
+
24
+ The data file is passed to you as a pandas dataframe named `data_file`. You can use it directly.
25
+ DO NOT try to load `data_file`, it is already pre-loaded!
26
+ """
27
+
28
+ def get_images_in_directory(directory):
29
+ image_extensions = {'.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff'}
30
+ image_files = []
31
+ for root, _, files in os.walk(directory):
32
+ for file in files:
33
+ if os.path.splitext(file)[1].lower() in image_extensions:
34
+ image_files.append(os.path.join(root, file))
35
+ return image_files
36
+
37
+ def interact_with_agent(file_input, additional_notes):
38
+ shutil.rmtree("./figures", ignore_errors=True)
39
+ os.makedirs("./figures", exist_ok=True)
40
+
41
+ data_file = pd.read_csv(file_input)
42
+ data_structure_notes = f"""- Description (output of .describe()):
43
+ {data_file.describe()}
44
+ - Columns with dtypes:
45
+ {data_file.dtypes}"""
46
+
47
+ prompt = base_prompt.format(structure_notes=data_structure_notes)
48
+ if additional_notes:
49
+ prompt += "\nAdditional notes on the data:\n" + additional_notes
50
+
51
+ messages = [{"role": "system", "content": "You are an expert data analyst."},
52
+ {"role": "user", "content": prompt}]
53
+
54
+ yield [gr.ChatMessage(role="assistant", content="⏳ _Starting analysis..._")]
55
+
56
+ response = openai.ChatCompletion.create(
57
+ model="gpt-4-turbo",
58
+ messages=messages,
59
+ max_tokens=1000
60
+ )
61
+
62
+ assistant_response = response["choices"][0]["message"]["content"]
63
+ messages.append({"role": "assistant", "content": assistant_response})
64
+
65
+ plot_image_paths = {}
66
+ for image_path in get_images_in_directory("./figures"):
67
+ if image_path not in plot_image_paths:
68
+ plot_image_paths[image_path] = True
69
+ messages.append(gr.ChatMessage(
70
+ role="assistant",
71
+ content=FileData(path=image_path, mime_type="image/png")
72
+ ))
73
+ yield messages
74
+
75
+ # Gradio UI
76
+ demo = gr.Blocks(
77
+ theme=gr.themes.Soft(
78
+ primary_hue=gr.themes.colors.yellow,
79
+ secondary_hue=gr.themes.colors.blue,
80
+ )
81
+ )
82
+
83
+ with demo:
84
+ gr.Markdown("""# GPT-4 Turbo Data Analyst 📊🤖
85
+ Drop a `.csv` file below, add notes if needed, and **GPT-4 Turbo** will analyze it and generate insights with plots!""")
86
+ file_input = gr.File(label="Upload CSV file")
87
+ text_input = gr.Textbox(label="Additional notes")
88
+ submit = gr.Button("Run Analysis!", variant="primary")
89
+ chatbot = gr.Chatbot(label="Data Analyst Assistant", type="messages")
90
+ submit.click(interact_with_agent, [file_input, text_input], [chatbot])
91
+
92
+ if __name__ == "__main__":
93
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ gradio
2
+ pandas
3
+ numpy
4
+ matplotlib
5
+ seaborn
6
+ scipy
7
+ smolagents
8
+ huggingface_hub