Spaces:

deepa-shalini
/

ChaRtBot

Sleeping

App Files Files Community

Deepa Shalini commited on Sep 5, 2024

Commit

53782c9

1 Parent(s): ccdfc4f

system prompt and helper methods for the app

Browse files

Files changed (8) hide show

assets/data_viz_best_practices.txt +21 -0
assets/example_subplots1.txt +23 -0
assets/example_subplots2.txt +37 -0
assets/example_subplots3.txt +25 -0
utils/chartbot_dataset_layout.py +57 -0
utils/components.py +6 -0
utils/helpers.py +95 -0
utils/prompt.py +122 -0

assets/data_viz_best_practices.txt ADDED Viewed

	@@ -0,0 +1,21 @@

+Horizontal Bar Graphs for Long X-axis Labels: Use horizontal bar graphs for long X-axis labels or when the X-axis has more than 20 bars.
+Sort Bar Graphs: When the bar graphs have categorical x-axes, always sort the bar graphs in order of the y-axis values,
+placing the highest y-axis values at the top (for horizontal bars - ascending order) or the left (for vertical bars - descending order).
+Pie charts should always have a hole in it.
+Limit Donut Chart Slices: Use donut charts sparingly, with 5 or fewer slices, and set the hole size to 0.5.
+Use Consistent Intervals: Maintain consistent axis intervals to avoid misleading or confusing viewers.
+Use Line Charts for Trends: Prefer line charts for showing trends over time to effectively illustrate changes and patterns.
+Label Data Directly: Whenever possible, label data points directly on the chart to reduce the need for users to cross-reference with legends.
+Limit Dual Y-Axes: Use dual Y-axes only when absolutely necessary, ensuring both axes are clearly labeled to avoid confusion.
+Stack Bars Appropriately: Use stacked bar charts to show part-to-whole relationships, ensuring that segments are clearly distinguishable.
+Avoid Overloading Scatter Plots: Limit the number of data points in scatter plots to prevent overcrowding. Consider using heatmaps or summary statistics if necessary.
+Use Tooltips for Extra Details: Use tooltips to provide additional information without overcrowding the graph.

assets/example_subplots1.txt ADDED Viewed

	@@ -0,0 +1,23 @@

+from plotly.subplots import make_subplots
+import plotly.graph_objects as go
+# Create subplots
+fig = make_subplots(rows=1, cols=2, subplot_titles=('Bar Chart', 'Line Chart'))
+# Add first trace (Bar chart)
+fig.add_trace(
+    go.Bar(x=['A', 'B', 'C'], y=[1, 3, 2]),
+    row=1, col=1
+)
+# Add second trace (Line chart)
+fig.add_trace(
+    go.Scatter(x=['A', 'B', 'C'], y=[2, 1, 3]),
+    row=1, col=2
+)
+# Update layout
+fig.update_layout(title_text='Multiple Visualizations in One Figure', plot_bgcolor='white')
+# Show the figure
+fig.show()

assets/example_subplots2.txt ADDED Viewed

	@@ -0,0 +1,37 @@

+from plotly.subplots import make_subplots
+import plotly.graph_objects as go
+# Create subplots
+fig = make_subplots(rows=2, cols=2, subplot_titles=('Scatter Plot', 'Pie Chart', 'Line Chart', 'Bar Chart'),
+                    specs=[[{"type": "scatter"}, {"type": "domain"}],
+                           [{"type": "scatter"}, {"type": "bar"}]])
+# Add first trace (Scatter plot)
+fig.add_trace(
+    go.Scatter(x=[1, 2, 3], y=[4, 5, 6], mode='markers'),
+    row=1, col=1
+)
+# Add second trace (Pie chart)
+fig.add_trace(
+    go.Pie(labels=['A', 'B', 'C'], values=[10, 20, 30], hole=0.5),
+    row=1, col=2
+)
+# Add third trace (Line chart)
+fig.add_trace(
+    go.Scatter(x=[1, 2, 3], y=[6, 5, 4], mode='lines'),
+    row=2, col=1
+)
+# Add fourth trace (Bar chart)
+fig.add_trace(
+    go.Bar(x=['X', 'Y', 'Z'], y=[2, 3, 1]),
+    row=2, col=2
+)
+# Update layout
+fig.update_layout(title_text='Multiple Visualizations Example', plot_bgcolor='white')
+# Show the figure
+fig.show()

assets/example_subplots3.txt ADDED Viewed

	@@ -0,0 +1,25 @@

+from plotly.subplots import make_subplots
+import plotly.graph_objects as go
+fig = make_subplots(
+    rows=2, cols=2,
+    specs=[[{"type": "bar"}, {"type": "barpolar"}],
+           [{"type": "pie"}, {"type": "scatter3d"}]],
+)
+fig.add_trace(go.Bar(y=[2, 3, 1]),
+              row=1, col=1)
+fig.add_trace(go.Barpolar(theta=[0, 45, 90], r=[2, 3, 1]),
+              row=1, col=2)
+fig.add_trace(go.Pie(values=[2, 3, 1], hole=0.5),
+              row=2, col=1)
+fig.add_trace(go.Scatter3d(x=[2, 3, 1], y=[0, 0, 0],
+                           z=[0.5, 1, 2], mode="lines"),
+              row=2, col=2)
+fig.update_layout(height=700, plot_bgcolor='white')
+fig.show()

utils/chartbot_dataset_layout.py ADDED Viewed

	@@ -0,0 +1,57 @@

+import dash
+import dash_ag_grid as dag
+import dash_mantine_components as dmc
+import dash_bootstrap_components as dbc
+from dash import html, dcc, callback, Input, Output, State
+from utils import components
+import pandas as pd
+def chartbot_common(
+        page_title: str,
+        csv_file_path: str,
+        starter_prompt_1_id: str,
+        starter_prompt_1: str,
+        starter_prompt_2_id: str,
+        starter_prompt_2: str,
+        prompt_textarea_id: str,
+        submit_button_id: str,
+        chartbot_output_id: str,
+        python_content_id: str
+    ) -> tuple:
+    df = pd.read_csv(csv_file_path)
+    layout = html.Div([
+        dmc.Title(page_title, order=1),
+        html.Br(),
+        dag.AgGrid(
+            rowData=df.to_dict("records"),
+            columnDefs=[{"field": col} for col in df.columns],
+            defaultColDef={"filter": True, "sortable": True, "resizable": True}
+        ),
+        html.Br(),
+        dmc.Group([
+            components.button_with_prompt(starter_prompt_1_id, starter_prompt_1),
+            components.button_with_prompt(starter_prompt_2_id, starter_prompt_2),
+        ], justify="center"),
+        html.Br(),
+        dmc.Group([
+            dmc.Textarea(placeholder="Type the prompt here ...", id=prompt_textarea_id, size="lg", w=1470),
+            dmc.Button("Submit", id=submit_button_id, color="#E71316", className="float-end")
+        ]),
+        dcc.Loading([
+            html.Div(id=chartbot_output_id),
+            dcc.Markdown(id=python_content_id)
+        ], type="cube")
+    ], style={'fontFamily': 'Helvetica'})
+    return layout, df

utils/components.py CHANGED Viewed

@@ -37,3 +37,9 @@ def summary_card(
                 dmc.Button("Get Started", color="#E71316", className="float-end")
             ], style={"margin": 10})
         ], withBorder=True, radius="md", w=320)

                 dmc.Button("Get Started", color="#E71316", className="float-end")
             ], style={"margin": 10})
         ], withBorder=True, radius="md", w=320)
+def button_with_prompt(
+        identity: str,
+        prompt: str
+    ) -> dmc.Button:
+    return dmc.Button(prompt, id=identity, color="gray", variant="outline", radius="md")

utils/helpers.py ADDED Viewed

	@@ -0,0 +1,95 @@

+import os
+import pandas as pd
+from dash import html, dcc
+# ilibraries to help upload files and parse the contents of the files
+import io
+import re
+import base64
+# libraries to help with the Dash app, layout, and callbacks
+import dash_ag_grid as dag
+from utils import prompt
+# Function to get the path of a file in the app source code
+def get_app_file_path(directory_name: str, file_name: str) -> str:
+    return os.path.join(os.path.dirname(__file__), "..\\{}".format(directory_name), file_name)
+# Function to read the content of a file
+def read_doc(file_path: str) -> str:
+    file = open(file_path, "r")
+    lines = file.readlines()
+    file.close()
+    return "".join(lines)
+# Function to get the figure from the code
+def get_fig_from_code(code, file_name):
+    local_variables = {}
+    try:
+        exec(code, {}, local_variables)
+    except Exception as e:
+        result_output = prompt.get_python_exception_response(code, str(e))
+        return display_response(result_output, file_name)
+    return local_variables["fig"]
+def display_response(response, file_name):
+    code_block_match = re.search(r"```(?:[Pp]ython)?(.*?)```", response, re.DOTALL)
+    #print(code_block_match)
+    if code_block_match:
+        code_block = code_block_match.group(1).strip()
+        cleaned_code = re.sub(r'(?m)^\s*fig\.show\(\)\s*$', '', code_block)
+        fig = get_fig_from_code(cleaned_code, file_name)
+        return dcc.Graph(figure=fig), response
+    else:
+        return "", response
+# Function to parse the contents of the uploaded file
+def parse_contents(contents, filename):
+    _, content_string = contents.split(",")
+    decoded = base64.b64decode(content_string)
+    try:
+        if 'csv' in filename:
+            df = pd.read_csv(io.StringIO(decoded.decode('utf-8')))
+        elif 'xls' in filename:
+            df = pd.read_excel(io.BytesIO(decoded))
+    except Exception as e:
+        print(e)
+        return html.Div([
+            "There was an error processing this file."
+        ])
+    return html.Div([
+        html.H5(filename),
+        dag.AgGrid(
+            rowData=df.to_dict("records"),
+            columnDefs=[{"field": col} for col in df.columns],
+            defaultColDef={"filter": True, "sortable": True, "resizable": True},
+        ),
+        dcc.Store(id='stored-data', data=df.to_dict("records")),
+        dcc.Store(id='stored-file-name', data=filename),
+        html.Hr()
+    ])
+# Function to save the dataframe to the current path
+def save_dataframe_to_current_path(df: pd.DataFrame, filename: str) -> None:
+    if os.path.exists(filename):
+        return
+    if 'csv' in filename:
+        df.to_csv(filename, index=False)
+    elif 'xls' in filename:
+        df.to_excel(filename, index=False)

utils/prompt.py ADDED Viewed

	@@ -0,0 +1,122 @@

+# libraries to help with the environment variables
+import os
+from dotenv import load_dotenv
+# libraries to help with the AI model
+from langchain_openai import AzureChatOpenAI
+from langchain_core.messages import HumanMessage
+from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
+from utils import helpers
+# get the credentials from .env
+load_dotenv()
+AZURE_KEY = os.getenv('KEY')
+AZURE_ENDPOINT = os.getenv('LLM_ENDPOINT')
+AZURE_NAME = os.getenv('LLM_DEPLOYMENT_NAME')
+AZURE_VERSION = os.getenv('VERSION')
+# define connectivity to the llm
+llm = AzureChatOpenAI(
+    deployment_name=AZURE_NAME,
+    openai_api_version=AZURE_VERSION,
+    openai_api_key=AZURE_KEY,
+    azure_endpoint=AZURE_ENDPOINT
+)
+'''Before creating any visualizations, ensure that any rows with NaN or missing values in the relevant columns are removed. Additionally,
+            handle missing values appropriately based on the context, ensuring cleaner visualizations.
+            For example, use df.dropna(subset=[column_name]) for data cleaning. Never use this statement: df.dropna(inplace=True).'''
+def get_prompt_text() -> str:
+    return """You are a data visualization expert and you only use the graphing library Plotly.
+            Ensure that before performing any data manipulation or plotting, the code checks for column data types and converts them if necessary.
+            For example, numeric columns should be converted to floats or integers using pd.to_numeric(), and non-numeric columns should be excluded from numeric operations.
+            Before creating any visualizations, ensure that any rows with NaN or missing values in the relevant columns are removed. Additionally,
+            handle missing values appropriately based on the context, ensuring cleaner visualizations.
+            For example, use df.dropna(subset=[column_name]) for data cleaning. Never use this statement: df.dropna(inplace=True).
+            The graphs you plot shall always have a white background and shall follow data visualization best practices.
+            Do not ignore any of the following visualization best practices:
+            {data_visualization_best_practices}
+            If the user requests a single visualization, create the graph using the plotly.express library and set the fig height to 800.
+            Ensure that the graph is clearly labeled with a title, x-axis label, y-axis label, and legend.
+            If the user has requested for a choropleth map of the United States of America (USA), ensure that the locations parameter in the px.choropleth() method is
+            set to the column which contains the two letter code state abbreviations, for example: AL, NY, TN, VT, UT (the column should not be determined by the name of the column,
+            but by the values it contains) and the scope parameter is set to 'usa'.
+            If the user requests multiple visualizations, create a subplot for each visualization.
+            The libraries required for multiple visualizations are: import plotly.graph_objects as go and from plotly.subplots import make_subplots.
+            Utilize the plotly.graph_objects library's make_subplots() method to create subplots, specifying the number of rows and columns,
+            and the specs parameter to define what type of graph will be present in each subplot to accommodate all requested visualizations.
+            Then, use the add_trace() method to add each graph to the appropriate subplot.
+            When generating subplots that include pie charts and xy plots (like bar or scatter), ensure that pie charts are assigned a separate 'domain' subplot type.
+            Use the make_subplots() function with the specs argument correctly set for pie charts and other plots.
+            For example, use make_subplots(rows=1, cols=2, specs=[[dict(type='domain'), dict(type='xy)]]) for a pie chart and a bar plot.
+            Before returning the final code, verify that all trace types are compatible with the assigned subplot types,
+            particularly ensuring that pie charts are in domain-type subplots. If an error is detected, correct the subplot type automatically.
+            Validate the layout before adding traces.
+            Ensure each subplot is clearly labeled and formatted according to best practices.
+            All the labels in the graph should be of the font family Helvetica, be it title, x-axis, y-axis, or legend.
+            Here are examples of how to create multiple visualizations in a single figure:
+            Example 1: \n
+            {example_subplots1}
+            Example 2: \n
+            {example_subplots2}
+            Example 3: \n
+            {example_subplots3}
+            The height of the figure (fig) should be set to 800.
+            Suppose that the data is provided as a {name_of_file} file.
+            Here are the first 5 rows of the data set: {data}. Follow the user's indications when creating the graph.
+            There should be no natural language text in the python code block."""
+def get_response(user_input: str, data_top5_csv_string: str, file_name: str) -> None:
+    prompt = ChatPromptTemplate.from_messages(
+            [
+                (
+                    "system",
+                    get_prompt_text()
+                ),
+                MessagesPlaceholder(variable_name="messages")
+            ]
+        )
+    chain = prompt | llm
+    response = chain.invoke(
+        {
+            "messages": [HumanMessage(content=user_input)],
+            "data_visualization_best_practices": helpers.read_doc(helpers.get_app_file_path("assets", "data_viz_best_practices.txt")),
+            "example_subplots1": helpers.read_doc(helpers.get_app_file_path("assets", "example_subplots1.txt")),
+            "example_subplots2": helpers.read_doc(helpers.get_app_file_path("assets", "example_subplots2.txt")),
+            "example_subplots3": helpers.read_doc(helpers.get_app_file_path("assets", "example_subplots3.txt")),
+            "data": data_top5_csv_string,
+            "name_of_file": file_name
+        }
+    )
+    return response.content
+def get_python_exception_prompt_text() -> str:
+    return """The Python code you provided {code} has an error {exception}"""
+def get_python_exception_response(code: str, exception: str) -> None:
+    prompt = ChatPromptTemplate.from_messages(
+            [
+                (
+                    "system",
+                    get_python_exception_prompt_text()
+                ),
+                MessagesPlaceholder(variable_name="messages")
+            ]
+        )
+    chain = prompt | llm
+    response = chain.invoke(
+        {
+            "messages": [HumanMessage(content="Rewrite the entire Python code so that it does not contain any errors. The code should be able to run without any errors.")],
+            "code": code,
+            "exception": exception
+        }
+    )
+    return response.content