Spaces:
Runtime error
Runtime error
| import chainlit as cl | |
| import pandas as pd | |
| import io | |
| import matplotlib.pyplot as plt | |
| import base64 | |
| from io import BytesIO | |
| from pandasai import SmartDataframe | |
| import pandas as pd | |
| from pandasai.llm import OpenAI | |
| from io import StringIO | |
| import matplotlib.pyplot as plt | |
| import csv | |
| from collections import defaultdict | |
| import os | |
| from langchain.agents import AgentExecutor, AgentType, initialize_agent | |
| from langchain.agents.structured_chat.prompt import SUFFIX | |
| from langchain.chat_models import ChatOpenAI | |
| from langchain.memory import ConversationBufferMemory | |
| from chainlit.action import Action | |
| from chainlit.input_widget import Select, Switch, Slider | |
| from langchain.tools import StructuredTool, Tool | |
| # this is our tool - which is what allows our agent to generate images in the first place! | |
| # the `description` field is of utmost imporance as it is what the LLM "brain" uses to determine | |
| # which tool to use for a given input. | |
| got_csv = False | |
| async def start(): | |
| """ | |
| This is called when the Chainlit chat is started! | |
| We can add some settings to our application to allow users to select the appropriate model, and more! | |
| """ | |
| settings = await cl.ChatSettings( | |
| [ | |
| Select( | |
| id="Model", | |
| label="OpenAI - Model", | |
| values=["gpt-3.5-turbo", "gpt-4-1106-preview"], | |
| initial_index=1, | |
| ), | |
| Switch(id="Streaming", label="OpenAI - Stream Tokens", initial=True), | |
| Slider( | |
| id="Temperature", | |
| label="OpenAI - Temperature", | |
| initial=0, | |
| min=0, | |
| max=2, | |
| step=0.1, | |
| ), | |
| ] | |
| ).send() | |
| await setup_agent(settings) | |
| async def setup_agent(settings): | |
| print("Setup agent with following settings: ", settings) | |
| # We set up our agent with the user selected (or default) settings here. | |
| llm = ChatOpenAI( | |
| temperature=settings["Temperature"], | |
| streaming=settings["Streaming"], | |
| model=settings["Model"], | |
| ) | |
| # We get our memory here, which is used to track the conversation history. | |
| memory = get_memory() | |
| # This suffix is used to provide the chat history to the prompt. | |
| _SUFFIX = "Chat history:\n{chat_history}\n\n" + SUFFIX | |
| # We initialize our agent here, which is simply being used to decide between responding with text | |
| # or an image | |
| agent = initialize_agent( | |
| llm=llm, # our LLM (default is GPT-4 Turbo) | |
| tools = [ | |
| generate_most_valuable_feature | |
| ], | |
| agent=AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION, # the agent type we're using today | |
| memory=memory, # our memory! | |
| agent_kwargs={ | |
| "suffix": _SUFFIX, # adding our chat history suffix | |
| "input_variables": ["input", "agent_scratchpad", "chat_history"], | |
| }, | |
| ) | |
| cl.user_session.set("agent", agent) # storing our agent in the user session | |
| def get_memory(): | |
| """ | |
| This is used to track the conversation history and allow our agent to | |
| remember what was said before. | |
| """ | |
| return ConversationBufferMemory(memory_key="chat_history") | |
| def find_most_valuable_feature(csv_file): | |
| print("find_most_valuable_feature") | |
| print(csv_file) | |
| smart_llm = OpenAI(api_token=os.environ["OPENAI_API_KEY"]) | |
| # Initialize a defaultdict to store column data | |
| columns = defaultdict(list) | |
| # Read the CSV file and populate the defaultdict | |
| with open("upload.csv") as f: | |
| reader = csv.reader(f) | |
| headers = next(reader) | |
| for row in reader: | |
| for header, value in zip(headers, row): | |
| columns[header].append(value) | |
| # Manually create a DataFrame from the defaultdict | |
| smart_df = pd.DataFrame({ | |
| "ID": columns["ID"], | |
| "Date and Time": columns["Date and Time"], | |
| "Business Unit": columns["Business Unit"], | |
| "Usage Change": columns["Usage Change"], | |
| "Wolftech Improvement": columns["Wolftech Improvement"], | |
| "Likelihood to Recommend": columns["Likelihood to Recommend"], | |
| "Effective Training": columns["Effective Training"], | |
| "Most Valuable Feature": columns["Most Valuable Feature"] | |
| }) | |
| smart_df = SmartDataframe(smart_df, config={"llm": smart_llm}) | |
| out = smart_df.chat('Summarize the top three "Most Valuable Feature" for people where Usage Changed was Increased?') | |
| print(out) | |
| df = out | |
| # Plotting | |
| plt.figure(figsize=(10, 6)) | |
| plt.bar(df["Most Valuable Feature"], df["Count"], color='blue') | |
| plt.xlabel('Most Valuable Feature') | |
| plt.ylabel('Count') | |
| plt.title('Count of Most Valuable Features') | |
| plt.xticks(rotation=45, ha="right") # Rotate labels for better readability | |
| plt.tight_layout() # Adjust layout for better fit | |
| # Save the plot to a BytesIO object | |
| image_buffer = BytesIO() | |
| plt.savefig(image_buffer, format='png') | |
| image_buffer.seek(0) | |
| return image_buffer | |
| generate_most_valuable_feature = Tool.from_function( | |
| func=find_most_valuable_feature, | |
| name="Find most valuable feature", | |
| description=f"Useful for finding the most valuable feature from a CSV file", | |
| return_direct=True, | |
| ) | |
| def process_and_analyze_data(csv_file): | |
| # Read CSV file | |
| csv_data = pd.read_csv(csv_file) | |
| # Logging to check data loading | |
| print(f"CSV Data Loaded: {csv_data.head()}") | |
| # Count of responses in each category of 'Business Unit' | |
| business_unit_counts = csv_data['Business Unit'].value_counts() | |
| # Plotting the count of responses in each 'Business Unit' category | |
| plt.figure(figsize=(10, 6)) | |
| business_unit_counts.plot(kind='bar') | |
| plt.title('Count of Responses by Business Unit') | |
| plt.xlabel('Business Unit') | |
| plt.ylabel('Count') | |
| plt.xticks(rotation=45) | |
| plt.tight_layout() | |
| # Save the plot to a BytesIO object | |
| image_buffer = BytesIO() | |
| plt.savefig(image_buffer, format='png') | |
| image_buffer.seek(0) | |
| return image_buffer | |
| # Function to handle message events | |
| async def handle_message(message: cl.Message): | |
| global got_csv, agent | |
| # Retrieve the CSV file from the message | |
| csv_file = next( | |
| ( | |
| io.BytesIO(file.content) | |
| for file in message.elements or [] | |
| if file.mime and "csv" in file.mime | |
| ), | |
| None, | |
| ) | |
| # Logging to check file retrieval | |
| print(f"CSV File: {csv_file}") | |
| if csv_file: | |
| got_csv = True | |
| try: | |
| image_buffer = find_most_valuable_feature(csv_file) | |
| # Get bytes data from BytesIO object and send the image data | |
| image_data = image_buffer.getvalue() | |
| name = "chart" | |
| cl.user_session.set(name, image_data) | |
| cl.user_session.set("generated_image", name) | |
| await cl.Message(content="Based on the people who increased usage, here are the most valuable features...").send() | |
| generated_image = cl.user_session.get(name) | |
| agent = cl.user_session.get("agent") | |
| res = await cl.make_async(agent.run)( | |
| input=message.content, callbacks=[cl.LangchainCallbackHandler()] | |
| ) | |
| elements = [] | |
| actions = [] | |
| elements = [ | |
| cl.Image( | |
| content=generated_image, | |
| name=name, | |
| display="inline", | |
| size="large" | |
| ) | |
| ] | |
| await cl.Message(content=name, elements=elements, actions=actions).send() | |
| except Exception as e: | |
| await cl.Message(content=f"An error occurred: {str(e)}").send() | |
| else: | |
| if not got_csv: | |
| await cl.Message(content="Please upload a CSV file.").send() | |
| else: | |
| res = await cl.make_async(agent.run)( | |
| input=message.content, callbacks=[cl.LangchainCallbackHandler()] | |
| ) | |
| await cl.Message(content=res).send() | |
| # Run the ChainLit app | |
| if __name__ == "__main__": | |
| cl.run() | |