alishabhale commited on
Commit
cd69665
·
verified ·
1 Parent(s): 01d457b

Updated for free chatGPT

Browse files
Files changed (1) hide show
  1. app.py +28 -69
app.py CHANGED
@@ -2,101 +2,60 @@ import os
2
  import shutil
3
  import gradio as gr
4
  import pandas as pd
5
- import openai # Using OpenAI GPT-4 Turbo
6
- from gradio import Chatbot
7
- from gradio.data_classes import FileData
8
-
9
- # Set OpenAI API Key (Ensure it's in your environment variables)
10
- openai.api_key = os.getenv("OPENAI_API_KEY")
11
-
12
- if not openai.api_key:
13
- raise ValueError("OpenAI API key is missing! Set OPENAI_API_KEY in Hugging Face Secrets.")
14
 
 
 
15
 
16
  base_prompt = """You are an expert data analyst.
17
- According to the features you have and the data structure given below, determine which feature should be the target.
18
- Then list 3 interesting questions that could be asked on this data, for instance about specific correlations with the target variable.
19
- Then answer these questions one by one, by finding the relevant numbers.
20
- Meanwhile, plot some figures using matplotlib/seaborn and save them to the folder './figures/'.
21
- Take care to clear each figure with plt.clf() before doing another plot.
22
- In your final answer, summarize these correlations and trends.
23
- After each number, derive real-world insights.
24
-
25
- Structure of the data:
26
  {structure_notes}
27
-
28
- The data file is passed to you as a pandas dataframe named `data_file`. You can use it directly.
29
- DO NOT try to load `data_file`, it is already pre-loaded!
30
  """
31
 
32
  def get_images_in_directory(directory):
33
  image_extensions = {'.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff'}
34
- image_files = []
35
- for root, _, files in os.walk(directory):
36
- for file in files:
37
- if os.path.splitext(file)[1].lower() in image_extensions:
38
- image_files.append(os.path.join(root, file))
39
- return image_files
40
 
41
  def interact_with_agent(file_input, additional_notes):
42
  shutil.rmtree("./figures", ignore_errors=True)
43
  os.makedirs("./figures", exist_ok=True)
44
 
45
  data_file = pd.read_csv(file_input)
46
- data_structure_notes = f"""- Description (output of .describe()):
47
  {data_file.describe()}
48
- - Columns with dtypes:
49
  {data_file.dtypes}"""
50
 
51
  prompt = base_prompt.format(structure_notes=data_structure_notes)
52
  if additional_notes:
53
- prompt += "\nAdditional notes on the data:\n" + additional_notes
54
-
55
- messages = [{"role": "system", "content": "You are an expert data analyst."},
56
- {"role": "user", "content": prompt}]
57
-
58
- yield [gr.ChatMessage(role="assistant", content="⏳ _Starting analysis..._")]
59
-
60
- client = openai.OpenAI()
61
 
62
- # response = openai.ChatCompletion.create(
63
- # model="gpt-4-turbo-2024-04-09", # Correct model name
64
- # messages=[{"role": "user", "content": "Hello, world!"}]
65
- # )
66
-
67
- # print(response)
68
 
69
- response = client.chat.completions.create(
70
- model="gpt-3.5-turbo",
71
- messages=[
72
- {"role": "system", "content": "You are an AI assistant."},
73
- {"role": "user", "content": "Hello!"}
74
- ]
75
- )
76
- assistant_response = response["choices"][0]["message"]["content"]
77
- messages.append({"role": "assistant", "content": assistant_response})
78
 
79
- plot_image_paths = {}
 
 
80
  for image_path in get_images_in_directory("./figures"):
81
- if image_path not in plot_image_paths:
82
- plot_image_paths[image_path] = True
83
- messages.append(gr.ChatMessage(
84
- role="assistant",
85
- content=FileData(path=image_path, mime_type="image/png")
86
- ))
87
  yield messages
88
 
89
- # Gradio UI
90
- demo = gr.Blocks(
91
- theme=gr.themes.Soft(
92
- primary_hue=gr.themes.colors.yellow,
93
- secondary_hue=gr.themes.colors.blue,
94
- )
95
- )
96
 
97
  with demo:
98
- gr.Markdown("""# GPT-4 Turbo Data Analyst 📊🤖
99
- Drop a `.csv` file below, add notes if needed, and **GPT-4 Turbo** will analyze it and generate insights with plots!""")
100
  file_input = gr.File(label="Upload CSV file")
101
  text_input = gr.Textbox(label="Additional notes")
102
  submit = gr.Button("Run Analysis!", variant="primary")
@@ -104,4 +63,4 @@ Drop a `.csv` file below, add notes if needed, and **GPT-4 Turbo** will analyze
104
  submit.click(interact_with_agent, [file_input, text_input], [chatbot])
105
 
106
  if __name__ == "__main__":
107
- demo.launch()
 
2
  import shutil
3
  import gradio as gr
4
  import pandas as pd
5
+ import matplotlib.pyplot as plt
6
+ import seaborn as sns
7
+ from transformers import pipeline
 
 
 
 
 
 
8
 
9
+ # Initialize Hugging Face Chat Model (Open-source LLM)
10
+ chatbot_pipeline = pipeline("text-generation", model="mistralai/Mistral-7B-Instruct-v0.1")
11
 
12
  base_prompt = """You are an expert data analyst.
13
+ Analyze the dataset structure and determine the best target variable.
14
+ List 3 interesting questions about correlations in the data.
15
+ Answer these questions with relevant numbers and real-world insights.
16
+ Generate relevant plots using Matplotlib/Seaborn and save them to './figures/'.
17
+ Ensure each figure is cleared before creating another.
18
+ Structure of the dataset:
 
 
 
19
  {structure_notes}
20
+ The data is already loaded as a pandas dataframe named `data_file`.
 
 
21
  """
22
 
23
  def get_images_in_directory(directory):
24
  image_extensions = {'.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff'}
25
+ return [os.path.join(directory, file) for file in os.listdir(directory) if os.path.splitext(file)[1].lower() in image_extensions]
 
 
 
 
 
26
 
27
  def interact_with_agent(file_input, additional_notes):
28
  shutil.rmtree("./figures", ignore_errors=True)
29
  os.makedirs("./figures", exist_ok=True)
30
 
31
  data_file = pd.read_csv(file_input)
32
+ data_structure_notes = f"""- Description:
33
  {data_file.describe()}
34
+ - Columns and types:
35
  {data_file.dtypes}"""
36
 
37
  prompt = base_prompt.format(structure_notes=data_structure_notes)
38
  if additional_notes:
39
+ prompt += "\nAdditional Notes:\n" + additional_notes
 
 
 
 
 
 
 
40
 
41
+ yield [gr.ChatMessage(role="assistant", content="⏳ _Analyzing dataset..._")]
 
 
 
 
 
42
 
43
+ # Generate response using Hugging Face LLM
44
+ response = chatbot_pipeline(prompt, max_length=1024, do_sample=True)[0]['generated_text']
 
 
 
 
 
 
 
45
 
46
+ messages = [{"role": "user", "content": prompt}, {"role": "assistant", "content": response}]
47
+
48
+ # Placeholder for visualization (if required)
49
  for image_path in get_images_in_directory("./figures"):
50
+ messages.append(gr.ChatMessage(role="assistant", content=gr.FileData(path=image_path, mime_type="image/png")))
51
+
 
 
 
 
52
  yield messages
53
 
54
+ # Gradio UI for Hugging Face Spaces
55
+ demo = gr.Blocks()
 
 
 
 
 
56
 
57
  with demo:
58
+ gr.Markdown("# GPT Data Analyst (Hugging Face) 📊🤖")
 
59
  file_input = gr.File(label="Upload CSV file")
60
  text_input = gr.Textbox(label="Additional notes")
61
  submit = gr.Button("Run Analysis!", variant="primary")
 
63
  submit.click(interact_with_agent, [file_input, text_input], [chatbot])
64
 
65
  if __name__ == "__main__":
66
+ demo.launch(share=True) # Enable public sharing on HF Spaces