LLM_Analyst / app.py
Vivek-tiwari's picture
Update app.py
49ea885 verified
raw
history blame
4.55 kB
import os
import shutil
import gradio as gr
from transformers import ReactCodeAgent, HfEngine
import pandas as pd
from transformers.agents import stream_to_gradio
from huggingface_hub import login
login(os.getenv("HUGGINGFACEHUB_API_TOKEN"))
llm_engine = HfEngine("mistralai/Mistral-Nemo-Instruct-2407")
agent = ReactCodeAgent(
tools=[],
llm_engine=llm_engine,
additional_authorized_imports=["numpy", "pandas", "matplotlib.pyplot", "seaborn", "scipy.stats"],
max_iterations=10,
)
base_prompt = """<task>You are an expert data analyst.
According to the features you have and the data structure given below, determine which feature should be the target.
Then list 5 interesting questions that could be asked on this data, for instance about specific correlations with target variable.
Then answer these questions one by one, by finding the relevant numbers.
<important>Meanwhile, plot some figures using matplotlib/seaborn and save them to the (already existing) folder './figures/': take care to clear each figure with plt.clf() before doing another plot.
In your final answer: summarize these correlations and trends
After each number derive real worlds insights, for instance: "Correlation between is_december and boredness is 1.3453, which suggest people are more bored in winter".
<important>Your final answer should be a long string with at least 3 numbered, detailed parts and a statement of explaining why you chose that as an answer.
Structure of the data:
{structure_notes}
<important>The data file is passed to you as the variable data_file, it is a pandas dataframe, you can use it directly.
<important>DO NOT try to load data_file, it is already a dataframe pre-loaded in your python interpreter!
"""
def get_images_in_directory(directory):
image_extensions = {'.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff'}
image_files = []
for root, dirs, files in os.walk(directory):
for file in files:
if os.path.splitext(file)[1].lower() in image_extensions:
image_files.append(os.path.join(root, file))
return image_files
def interact_with_agent(file_input, prompt):
if file_input is None:
yield [["assistant", "Please upload a CSV file before running the analysis."]]
return
shutil.rmtree("./figures", ignore_errors=True)
os.makedirs("./figures", exist_ok=True)
try:
data_file = pd.read_csv(file_input.name)
except Exception as e:
yield [["assistant", f"Error reading CSV file: {str(e)}"]]
return
data_structure_notes = f"""- Description (output of .describe()):
{data_file.describe()}
- Columns with dtypes:
{data_file.dtypes}"""
full_prompt = base_prompt.format(structure_notes=data_structure_notes)
if prompt:
full_prompt += f"\nAdditional notes: {prompt}"
messages = [["user", full_prompt]]
yield messages + [["assistant", "⏳ Starting task..."]]
plot_image_paths = {}
for msg in stream_to_gradio(agent, full_prompt, data_file=data_file):
if isinstance(msg.content, str):
messages.append(["assistant", msg.content])
elif isinstance(msg.content, dict) and 'path' in msg.content:
# Handle image messages
image_path = msg.content['path']
if image_path not in plot_image_paths:
messages.append(["assistant", (image_path,)])
plot_image_paths[image_path] = True
yield messages + [["assistant", "⏳ Still processing..."]]
# Remove the last "Still processing..." message
messages = messages[:-1]
yield messages
with gr.Blocks(
theme=gr.themes.Soft(
primary_hue=gr.themes.colors.blue,
secondary_hue=gr.themes.colors.gray,
)
) as demo:
gr.Markdown("""# Mistral-Nemo Data analyst 📊🤔
Drop a `.csv` file below, add notes to describe this data if needed, and Mistral-Nemo will analyze the file content and draw figures for you!**""")
file_input = gr.File(label="Your file to analyze", file_types=[".csv"])
text_input = gr.Textbox(
label="Additional notes to support the analysis"
)
submit = gr.Button("Run analysis!", variant="primary")
chatbot = gr.Chatbot(
label="Data Analyst Agent",
avatar_images=(
None,
"https://em-content.zobj.net/source/twitter/53/robot-face_1f916.png",
),
)
submit.click(interact_with_agent, [file_input, text_input], [chatbot])
if __name__ == "__main__":
demo.launch(share=True)