Spaces:
Runtime error
Runtime error
Commit
·
241748e
1
Parent(s):
93b7f65
Initial commit
Browse files- app.py +93 -0
- requirements.txt +8 -0
app.py
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import shutil
|
| 3 |
+
import gradio as gr
|
| 4 |
+
import pandas as pd
|
| 5 |
+
import openai # Using OpenAI GPT-4 Turbo
|
| 6 |
+
from gradio import Chatbot
|
| 7 |
+
from gradio.data_classes import FileData
|
| 8 |
+
|
| 9 |
+
# Set OpenAI API Key (Ensure it's in your environment variables)
|
| 10 |
+
openai.api_key = os.getenv("OPENAI_API_KEY")
|
| 11 |
+
|
| 12 |
+
base_prompt = """You are an expert data analyst.
|
| 13 |
+
According to the features you have and the data structure given below, determine which feature should be the target.
|
| 14 |
+
Then list 3 interesting questions that could be asked on this data, for instance about specific correlations with the target variable.
|
| 15 |
+
Then answer these questions one by one, by finding the relevant numbers.
|
| 16 |
+
Meanwhile, plot some figures using matplotlib/seaborn and save them to the folder './figures/'.
|
| 17 |
+
Take care to clear each figure with plt.clf() before doing another plot.
|
| 18 |
+
In your final answer, summarize these correlations and trends.
|
| 19 |
+
After each number, derive real-world insights.
|
| 20 |
+
|
| 21 |
+
Structure of the data:
|
| 22 |
+
{structure_notes}
|
| 23 |
+
|
| 24 |
+
The data file is passed to you as a pandas dataframe named `data_file`. You can use it directly.
|
| 25 |
+
DO NOT try to load `data_file`, it is already pre-loaded!
|
| 26 |
+
"""
|
| 27 |
+
|
| 28 |
+
def get_images_in_directory(directory):
|
| 29 |
+
image_extensions = {'.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff'}
|
| 30 |
+
image_files = []
|
| 31 |
+
for root, _, files in os.walk(directory):
|
| 32 |
+
for file in files:
|
| 33 |
+
if os.path.splitext(file)[1].lower() in image_extensions:
|
| 34 |
+
image_files.append(os.path.join(root, file))
|
| 35 |
+
return image_files
|
| 36 |
+
|
| 37 |
+
def interact_with_agent(file_input, additional_notes):
|
| 38 |
+
shutil.rmtree("./figures", ignore_errors=True)
|
| 39 |
+
os.makedirs("./figures", exist_ok=True)
|
| 40 |
+
|
| 41 |
+
data_file = pd.read_csv(file_input)
|
| 42 |
+
data_structure_notes = f"""- Description (output of .describe()):
|
| 43 |
+
{data_file.describe()}
|
| 44 |
+
- Columns with dtypes:
|
| 45 |
+
{data_file.dtypes}"""
|
| 46 |
+
|
| 47 |
+
prompt = base_prompt.format(structure_notes=data_structure_notes)
|
| 48 |
+
if additional_notes:
|
| 49 |
+
prompt += "\nAdditional notes on the data:\n" + additional_notes
|
| 50 |
+
|
| 51 |
+
messages = [{"role": "system", "content": "You are an expert data analyst."},
|
| 52 |
+
{"role": "user", "content": prompt}]
|
| 53 |
+
|
| 54 |
+
yield [gr.ChatMessage(role="assistant", content="⏳ _Starting analysis..._")]
|
| 55 |
+
|
| 56 |
+
response = openai.ChatCompletion.create(
|
| 57 |
+
model="gpt-4-turbo",
|
| 58 |
+
messages=messages,
|
| 59 |
+
max_tokens=1000
|
| 60 |
+
)
|
| 61 |
+
|
| 62 |
+
assistant_response = response["choices"][0]["message"]["content"]
|
| 63 |
+
messages.append({"role": "assistant", "content": assistant_response})
|
| 64 |
+
|
| 65 |
+
plot_image_paths = {}
|
| 66 |
+
for image_path in get_images_in_directory("./figures"):
|
| 67 |
+
if image_path not in plot_image_paths:
|
| 68 |
+
plot_image_paths[image_path] = True
|
| 69 |
+
messages.append(gr.ChatMessage(
|
| 70 |
+
role="assistant",
|
| 71 |
+
content=FileData(path=image_path, mime_type="image/png")
|
| 72 |
+
))
|
| 73 |
+
yield messages
|
| 74 |
+
|
| 75 |
+
# Gradio UI
|
| 76 |
+
demo = gr.Blocks(
|
| 77 |
+
theme=gr.themes.Soft(
|
| 78 |
+
primary_hue=gr.themes.colors.yellow,
|
| 79 |
+
secondary_hue=gr.themes.colors.blue,
|
| 80 |
+
)
|
| 81 |
+
)
|
| 82 |
+
|
| 83 |
+
with demo:
|
| 84 |
+
gr.Markdown("""# GPT-4 Turbo Data Analyst 📊🤖
|
| 85 |
+
Drop a `.csv` file below, add notes if needed, and **GPT-4 Turbo** will analyze it and generate insights with plots!""")
|
| 86 |
+
file_input = gr.File(label="Upload CSV file")
|
| 87 |
+
text_input = gr.Textbox(label="Additional notes")
|
| 88 |
+
submit = gr.Button("Run Analysis!", variant="primary")
|
| 89 |
+
chatbot = gr.Chatbot(label="Data Analyst Assistant", type="messages")
|
| 90 |
+
submit.click(interact_with_agent, [file_input, text_input], [chatbot])
|
| 91 |
+
|
| 92 |
+
if __name__ == "__main__":
|
| 93 |
+
demo.launch()
|
requirements.txt
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio
|
| 2 |
+
pandas
|
| 3 |
+
numpy
|
| 4 |
+
matplotlib
|
| 5 |
+
seaborn
|
| 6 |
+
scipy
|
| 7 |
+
smolagents
|
| 8 |
+
huggingface_hub
|