ChaRtBot / utils /helpers.py
Deepa Shalini
explore dataset feature with filtering & sorting
290ebbb
import os
import pandas as pd
from dash import html, dcc
import dash_mantine_components as dmc
# ilibraries to help upload files and parse the contents of the files
import io
import re
import base64
from base64 import b64encode
# libraries to help with the Dash app, layout, and callbacks
import dash_ag_grid as dag
# Add logging import
import logging
from utils import prompt
# Configure logging
logging.basicConfig(level=logging.ERROR)
logger = logging.getLogger(__name__)
# Function to get the path of a file in the app source code
def get_app_file_path(directory_name: str, file_name: str) -> str:
return os.path.join(os.path.dirname(__file__), "..", directory_name, file_name)
# Function to read the content of a file
def read_doc(file_path: str) -> str:
file = open(file_path, "r")
lines = file.readlines()
file.close()
return "".join(lines)
# Function to get the figure from the code
def get_fig_from_code(code, file_name):
local_variables = {}
try:
exec(code, {}, local_variables)
except Exception as e:
# Raise the exception to be handled by the caller
# Don't call display_response here as it would cause incorrect return value count
raise e
return local_variables["fig"]
def display_response(response, file_name):
try:
code_block_match = re.search(r"```(?:[Pp]ython)?(.*?)```", response, re.DOTALL)
if code_block_match:
code_block = code_block_match.group(1).strip()
# Check if code ends with fig.show() and add it if missing
if not re.search(r'fig\.show\(\)\s*$', code_block, re.MULTILINE):
code_block = code_block + "\nfig.show()"
cleaned_code = re.sub(r'(?m)^\s*fig\.show\(\)\s*$', '', code_block)
try:
fig = get_fig_from_code(cleaned_code, file_name)
buffer = io.StringIO()
fig.write_html(buffer)
html_bytes = buffer.getvalue().encode()
encoded = b64encode(html_bytes).decode()
return dcc.Graph(figure=fig), None, {"display": "block"}, encoded, False
except Exception as e:
# Log the original error
logger.error(f"Code execution error for file '{file_name}': {str(e)}", exc_info=True)
# Try to get corrected code from LLM
try:
result_output = prompt.get_python_exception_response(cleaned_code, str(e))
# Parse the corrected code
corrected_code_match = re.search(r"```(?:[Pp]ython)?(.*?)```", result_output, re.DOTALL)
if corrected_code_match:
corrected_code = corrected_code_match.group(1).strip()
corrected_code_clean = re.sub(r'(?m)^\s*fig\.show\(\)\s*$', '', corrected_code)
# Try to execute corrected code
fig = get_fig_from_code(corrected_code_clean, file_name)
buffer = io.StringIO()
fig.write_html(buffer)
html_bytes = buffer.getvalue().encode()
encoded = b64encode(html_bytes).decode()
return dcc.Graph(figure=fig), None, {"display": "block"}, encoded, False
else:
raise ValueError("No code block found in corrected response")
except Exception as api_error:
# Log the retry error
logger.error(f"Retry failed for file '{file_name}': {str(api_error)}", exc_info=True)
# Show user-friendly error message
return html.Div([
html.Br(),
dmc.Alert(
"We couldn't process your request. Please try modifying your prompt or check your data format.",
title="Unable to Generate Chart",
color="red"
)
]), None, {"display": "none"}, None, False
else:
return "", None, {"display": "none"}, None, False
except Exception as e:
# Log API errors
logger.error(f"API error: {str(e)}", exc_info=True)
# Handle API errors gracefully with user-friendly message
return html.Div([
html.Br(),
dmc.Alert(
"We couldn't process your request. Please wait a moment and try again.",
title="Service Error",
color="red"
)
]), None, {"display": "none"}, None, False
# Function to parse the contents of the uploaded file
def parse_contents(contents, filename):
_, content_string = contents.split(",")
decoded = base64.b64decode(content_string)
try:
if 'csv' in filename:
df = pd.read_csv(io.StringIO(decoded.decode('utf-8')))
elif 'xls' in filename:
df = pd.read_excel(io.BytesIO(decoded))
except Exception as e:
print(e)
return html.Div([
"There was an error processing this file."
])
return html.Div([
html.H5(filename),
dag.AgGrid(
rowData=df.to_dict("records"),
columnDefs=[{"field": col} for col in df.columns],
defaultColDef={"filter": True, "sortable": True, "resizable": True},
),
dcc.Store(id='stored-data', data=df.to_dict("records")),
dcc.Store(id='stored-file-name', data=filename),
html.Br()
])
# Function to save the dataframe to the current path
def save_dataframe_to_current_path(df: pd.DataFrame, filename: str) -> None:
if os.path.exists(filename):
return
if 'csv' in filename:
df.to_csv(filename, index=False)
elif 'xls' in filename:
df.to_excel(filename, index=False)
def create_ag_grid(df):
"""
Create a Dash AG Grid component for data exploration.
Args:
df: pandas DataFrame
Returns:
dag.AgGrid component
"""
return dag.AgGrid(
id="data-explorer-grid",
rowData=df.to_dict("records"),
columnDefs=[{
"field": col,
"filter": True,
"sortable": True,
"resizable": True,
"floatingFilter": True
} for col in df.columns],
defaultColDef={
"filter": True,
"sortable": True,
"resizable": True,
"minWidth": 100
},
dashGridOptions={
"pagination": True,
"paginationPageSize": 10,
"paginationPageSizeSelector": [10, 20, 50, 100],
"animateRows": True
},
style={"height": "400px", "width": "100%"},
className="ag-theme-alpine"
)