File size: 2,641 Bytes
241748e
 
 
 
cd69665
 
 
57cfd0f
cd69665
 
57cfd0f
241748e
cd69665
 
 
 
 
 
241748e
cd69665
241748e
 
 
 
cd69665
241748e
 
 
 
 
 
cd69665
241748e
cd69665
241748e
 
 
 
cd69665
5a58b24
cd69665
57cfd0f
cd69665
 
241748e
cd69665
 
 
241748e
cd69665
 
241748e
 
cd69665
 
241748e
 
cd69665
241748e
 
 
 
 
 
 
cd69665
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import os
import shutil
import gradio as gr
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from transformers import pipeline

# Initialize Hugging Face Chat Model (Open-source LLM)
chatbot_pipeline = pipeline("text-generation", model="mistralai/Mistral-7B-Instruct-v0.1")

base_prompt = """You are an expert data analyst.
Analyze the dataset structure and determine the best target variable.
List 3 interesting questions about correlations in the data.
Answer these questions with relevant numbers and real-world insights.
Generate relevant plots using Matplotlib/Seaborn and save them to './figures/'.
Ensure each figure is cleared before creating another.
Structure of the dataset:
{structure_notes}
The data is already loaded as a pandas dataframe named `data_file`.
"""

def get_images_in_directory(directory):
    image_extensions = {'.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff'}
    return [os.path.join(directory, file) for file in os.listdir(directory) if os.path.splitext(file)[1].lower() in image_extensions]

def interact_with_agent(file_input, additional_notes):
    shutil.rmtree("./figures", ignore_errors=True)
    os.makedirs("./figures", exist_ok=True)

    data_file = pd.read_csv(file_input)
    data_structure_notes = f"""- Description:
    {data_file.describe()}
    - Columns and types:
    {data_file.dtypes}"""

    prompt = base_prompt.format(structure_notes=data_structure_notes)
    if additional_notes:
        prompt += "\nAdditional Notes:\n" + additional_notes

    yield [gr.ChatMessage(role="assistant", content="⏳ _Analyzing dataset..._")]

    # Generate response using Hugging Face LLM
    response = chatbot_pipeline(prompt, max_length=1024, do_sample=True)[0]['generated_text']
    
    messages = [{"role": "user", "content": prompt}, {"role": "assistant", "content": response}]
    
    # Placeholder for visualization (if required)
    for image_path in get_images_in_directory("./figures"):
        messages.append(gr.ChatMessage(role="assistant", content=gr.FileData(path=image_path, mime_type="image/png")))

    yield messages

# Gradio UI for Hugging Face Spaces
demo = gr.Blocks()

with demo:
    gr.Markdown("# GPT Data Analyst (Hugging Face) 📊🤖")
    file_input = gr.File(label="Upload CSV file")
    text_input = gr.Textbox(label="Additional notes")
    submit = gr.Button("Run Analysis!", variant="primary")
    chatbot = gr.Chatbot(label="Data Analyst Assistant", type="messages")
    submit.click(interact_with_agent, [file_input, text_input], [chatbot])

if __name__ == "__main__":
    demo.launch(share=True)  # Enable public sharing on HF Spaces