Spaces:

alishabhale
/

data-analysis-chatbot

Runtime error

App Files Files Community

alishabhale commited on Mar 19, 2025

Commit

cd69665

verified ·

1 Parent(s): 01d457b

Updated for free chatGPT

Browse files

Files changed (1) hide show

app.py +28 -69

app.py CHANGED Viewed

@@ -2,101 +2,60 @@ import os
 import shutil
 import gradio as gr
 import pandas as pd
-import openai  # Using OpenAI GPT-4 Turbo
-from gradio import Chatbot
-from gradio.data_classes import FileData
-# Set OpenAI API Key (Ensure it's in your environment variables)
-openai.api_key = os.getenv("OPENAI_API_KEY")
-if not openai.api_key:
-    raise ValueError("OpenAI API key is missing! Set OPENAI_API_KEY in Hugging Face Secrets.")
 base_prompt = """You are an expert data analyst.
-According to the features you have and the data structure given below, determine which feature should be the target.
-Then list 3 interesting questions that could be asked on this data, for instance about specific correlations with the target variable.
-Then answer these questions one by one, by finding the relevant numbers.
-Meanwhile, plot some figures using matplotlib/seaborn and save them to the folder './figures/'.
-Take care to clear each figure with plt.clf() before doing another plot.
-In your final answer, summarize these correlations and trends.
-After each number, derive real-world insights.
-Structure of the data:
 {structure_notes}
-The data file is passed to you as a pandas dataframe named `data_file`. You can use it directly.
-DO NOT try to load `data_file`, it is already pre-loaded!
 """
 def get_images_in_directory(directory):
     image_extensions = {'.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff'}
-    image_files = []
-    for root, _, files in os.walk(directory):
-        for file in files:
-            if os.path.splitext(file)[1].lower() in image_extensions:
-                image_files.append(os.path.join(root, file))
-    return image_files
 def interact_with_agent(file_input, additional_notes):
     shutil.rmtree("./figures", ignore_errors=True)
     os.makedirs("./figures", exist_ok=True)
     data_file = pd.read_csv(file_input)
-    data_structure_notes = f"""- Description (output of .describe()):
     {data_file.describe()}
-    - Columns with dtypes:
     {data_file.dtypes}"""
     prompt = base_prompt.format(structure_notes=data_structure_notes)
     if additional_notes:
-        prompt += "\nAdditional notes on the data:\n" + additional_notes
-    messages = [{"role": "system", "content": "You are an expert data analyst."},
-                {"role": "user", "content": prompt}]
-    yield [gr.ChatMessage(role="assistant", content="⏳ _Starting analysis..._")]
-    client = openai.OpenAI()
-    # response = openai.ChatCompletion.create(
-    # model="gpt-4-turbo-2024-04-09",  # Correct model name
-    # messages=[{"role": "user", "content": "Hello, world!"}]
-    # )
-    # print(response)
-    response = client.chat.completions.create(
-        model="gpt-3.5-turbo",
-        messages=[
-            {"role": "system", "content": "You are an AI assistant."},
-            {"role": "user", "content": "Hello!"}
-        ]
-    )
-    assistant_response = response["choices"][0]["message"]["content"]
-    messages.append({"role": "assistant", "content": assistant_response})
-    plot_image_paths = {}
     for image_path in get_images_in_directory("./figures"):
-        if image_path not in plot_image_paths:
-            plot_image_paths[image_path] = True
-            messages.append(gr.ChatMessage(
-                role="assistant",
-                content=FileData(path=image_path, mime_type="image/png")
-            ))
     yield messages
-# Gradio UI
-demo = gr.Blocks(
-    theme=gr.themes.Soft(
-        primary_hue=gr.themes.colors.yellow,
-        secondary_hue=gr.themes.colors.blue,
-    )
-)
 with demo:
-    gr.Markdown("""# GPT-4 Turbo Data Analyst 📊🤖
-Drop a `.csv` file below, add notes if needed, and **GPT-4 Turbo** will analyze it and generate insights with plots!""")
     file_input = gr.File(label="Upload CSV file")
     text_input = gr.Textbox(label="Additional notes")
     submit = gr.Button("Run Analysis!", variant="primary")
@@ -104,4 +63,4 @@ Drop a `.csv` file below, add notes if needed, and **GPT-4 Turbo** will analyze
     submit.click(interact_with_agent, [file_input, text_input], [chatbot])
 if __name__ == "__main__":
-    demo.launch()

 import shutil
 import gradio as gr
 import pandas as pd
+import matplotlib.pyplot as plt
+import seaborn as sns
+from transformers import pipeline
+# Initialize Hugging Face Chat Model (Open-source LLM)
+chatbot_pipeline = pipeline("text-generation", model="mistralai/Mistral-7B-Instruct-v0.1")
 base_prompt = """You are an expert data analyst.
+Analyze the dataset structure and determine the best target variable.
+List 3 interesting questions about correlations in the data.
+Answer these questions with relevant numbers and real-world insights.
+Generate relevant plots using Matplotlib/Seaborn and save them to './figures/'.
+Ensure each figure is cleared before creating another.
+Structure of the dataset:
 {structure_notes}
+The data is already loaded as a pandas dataframe named `data_file`.
 """
 def get_images_in_directory(directory):
     image_extensions = {'.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff'}
+    return [os.path.join(directory, file) for file in os.listdir(directory) if os.path.splitext(file)[1].lower() in image_extensions]
 def interact_with_agent(file_input, additional_notes):
     shutil.rmtree("./figures", ignore_errors=True)
     os.makedirs("./figures", exist_ok=True)
     data_file = pd.read_csv(file_input)
+    data_structure_notes = f"""- Description:
     {data_file.describe()}
+    - Columns and types:
     {data_file.dtypes}"""
     prompt = base_prompt.format(structure_notes=data_structure_notes)
     if additional_notes:
+        prompt += "\nAdditional Notes:\n" + additional_notes
+    yield [gr.ChatMessage(role="assistant", content="⏳ _Analyzing dataset..._")]
+    # Generate response using Hugging Face LLM
+    response = chatbot_pipeline(prompt, max_length=1024, do_sample=True)[0]['generated_text']
+    messages = [{"role": "user", "content": prompt}, {"role": "assistant", "content": response}]
+    # Placeholder for visualization (if required)
     for image_path in get_images_in_directory("./figures"):
+        messages.append(gr.ChatMessage(role="assistant", content=gr.FileData(path=image_path, mime_type="image/png")))
     yield messages
+# Gradio UI for Hugging Face Spaces
+demo = gr.Blocks()
 with demo:
+    gr.Markdown("# GPT Data Analyst (Hugging Face) 📊🤖")
     file_input = gr.File(label="Upload CSV file")
     text_input = gr.Textbox(label="Additional notes")
     submit = gr.Button("Run Analysis!", variant="primary")
     submit.click(interact_with_agent, [file_input, text_input], [chatbot])
 if __name__ == "__main__":
+    demo.launch(share=True)  # Enable public sharing on HF Spaces