import os import shutil import gradio as gr import pandas as pd import matplotlib.pyplot as plt import seaborn as sns from transformers import pipeline # Initialize Hugging Face Chat Model (Open-source LLM) chatbot_pipeline = pipeline("text-generation", model="mistralai/Mistral-7B-Instruct-v0.1") base_prompt = """You are an expert data analyst. Analyze the dataset structure and determine the best target variable. List 3 interesting questions about correlations in the data. Answer these questions with relevant numbers and real-world insights. Generate relevant plots using Matplotlib/Seaborn and save them to './figures/'. Ensure each figure is cleared before creating another. Structure of the dataset: {structure_notes} The data is already loaded as a pandas dataframe named `data_file`. """ def get_images_in_directory(directory): image_extensions = {'.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff'} return [os.path.join(directory, file) for file in os.listdir(directory) if os.path.splitext(file)[1].lower() in image_extensions] def interact_with_agent(file_input, additional_notes): shutil.rmtree("./figures", ignore_errors=True) os.makedirs("./figures", exist_ok=True) data_file = pd.read_csv(file_input) data_structure_notes = f"""- Description: {data_file.describe()} - Columns and types: {data_file.dtypes}""" prompt = base_prompt.format(structure_notes=data_structure_notes) if additional_notes: prompt += "\nAdditional Notes:\n" + additional_notes yield [gr.ChatMessage(role="assistant", content="⏳ _Analyzing dataset..._")] # Generate response using Hugging Face LLM response = chatbot_pipeline(prompt, max_length=1024, do_sample=True)[0]['generated_text'] messages = [{"role": "user", "content": prompt}, {"role": "assistant", "content": response}] # Placeholder for visualization (if required) for image_path in get_images_in_directory("./figures"): messages.append(gr.ChatMessage(role="assistant", content=gr.FileData(path=image_path, mime_type="image/png"))) yield messages # Gradio UI for Hugging Face Spaces demo = gr.Blocks() with demo: gr.Markdown("# GPT Data Analyst (Hugging Face) 📊🤖") file_input = gr.File(label="Upload CSV file") text_input = gr.Textbox(label="Additional notes") submit = gr.Button("Run Analysis!", variant="primary") chatbot = gr.Chatbot(label="Data Analyst Assistant", type="messages") submit.click(interact_with_agent, [file_input, text_input], [chatbot]) if __name__ == "__main__": demo.launch(share=True) # Enable public sharing on HF Spaces