Spaces:

deepa-shalini
/

ChaRtBot

Sleeping

App Files Files Community

Deepa Shalini commited on Jan 7

Commit

d04a737

1 Parent(s): 723862f

support for dumbbell, choropleth and polar charts

Browse files

Files changed (7) hide show

app.py +3 -3
assets/example_dumbbell_chart.txt +61 -0
assets/example_polar_bar.txt +121 -0
assets/example_polar_scatter.txt +104 -0
data/polar_bar_data.csv +3 -0
utils/helpers.py +5 -1
utils/prompt.py +137 -23

app.py CHANGED Viewed

@@ -37,8 +37,8 @@ app.layout = dmc.MantineProvider(
                                     className="brand"
                                 ),
                                 html.Button(
-                                    "New Chat",
-                                    id="new-chat-button",
                                     className="pill",
                                     n_clicks=0,
                                     style={
@@ -281,7 +281,7 @@ def download_html(encoded):
     Output("html-buffer", "data", allow_duplicate=True),
     Output("submit-button", "disabled", allow_duplicate=True),
     Output("upload-data", "contents"),
-    Input("new-chat-button", "n_clicks"),
     prevent_initial_call=True
 )
 def reset_chat(n_clicks):

                                     className="brand"
                                 ),
                                 html.Button(
+                                    "New Chart",
+                                    id="new-chart-button",
                                     className="pill",
                                     n_clicks=0,
                                     style={
     Output("html-buffer", "data", allow_duplicate=True),
     Output("submit-button", "disabled", allow_duplicate=True),
     Output("upload-data", "contents"),
+    Input("new-chart-button", "n_clicks"),
     prevent_initial_call=True
 )
 def reset_chat(n_clicks):

assets/example_dumbbell_chart.txt ADDED Viewed

	@@ -0,0 +1,61 @@

+import plotly.graph_objects as go
+import pandas as pd
+# Sample data for dumbbell chart
+countries = ['Country A', 'Country B', 'Country C', 'Country D', 'Country E']
+year_1952 = [65, 68, 70, 72, 75]
+year_2002 = [72, 76, 78, 80, 82]
+# Prepare line coordinates for connecting dots
+line_x = []
+line_y = []
+for i, country in enumerate(countries):
+    line_x.extend([year_1952[i], year_2002[i], None])
+    line_y.extend([country, country, None])
+# Create dumbbell chart
+fig = go.Figure(
+    data=[
+        # Add connecting lines
+        go.Scatter(
+            x=line_x,
+            y=line_y,
+            mode='markers+lines',
+            showlegend=False,
+            marker=dict(
+                symbol="arrow",
+                color="black",
+                size=16,
+                angleref="previous",
+                standoff=8
+            )
+        ),
+        # Add first year markers
+        go.Scatter(
+            x=year_1952,
+            y=countries,
+            mode='markers',
+            name='1952',
+            marker=dict(color='green', size=10)
+        ),
+        # Add second year markers
+        go.Scatter(
+            x=year_2002,
+            y=countries,
+            mode='markers',
+            name='2002',
+            marker=dict(color='blue', size=10)
+        ),
+    ]
+)
+# Update layout
+fig.update_layout(
+    title='Comparison Between Two Years',
+    height=800,
+    plot_bgcolor='white',
+    legend_itemclick=False
+)
+# Show the figure
+fig.show()

assets/example_polar_bar.txt ADDED Viewed

	@@ -0,0 +1,121 @@

+import pandas as pd
+import numpy as np
+import plotly.graph_objects as go
+# Sample data for demonstration (full year 2024)
+data = {
+    'date': pd.date_range('2024-01-01', periods=365, freq='D'),
+    'value': np.random.randint(50, 200, 365)
+}
+df = pd.DataFrame(data)
+# Extract calendar components
+df['month'] = df['date'].dt.month  # 1..12
+# Convert pandas weekday (Monday=0..Sunday=6) to Sun=0..Sat=6
+df['weekday_sun0'] = (df['date'].dt.dayofweek + 1) % 7
+# Aggregate values by month x weekday
+agg = (
+    df.groupby(['month', 'weekday_sun0'], as_index=False)['value']
+    .sum()
+    .rename(columns={'value': 'total_value'})
+)
+# Ensure all 12x7 cells exist (fill missing with 0)
+full = pd.MultiIndex.from_product(
+    [range(1, 13), range(0, 7)],
+    names=['month', 'weekday_sun0']
+).to_frame(index=False)
+agg = full.merge(agg, on=['month', 'weekday_sun0'], how='left').fillna({'total_value': 0.0})
+# Labels for months and weekdays
+month_labels = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
+weekday_labels = ['Sun', 'Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat']
+agg['month_name'] = agg['month'].map(lambda m: month_labels[m-1])
+agg['weekday_name'] = agg['weekday_sun0'].map(lambda w: weekday_labels[w])
+# Polar "cell" geometry
+# Each month occupies a 30-degree sector (360/12 = 30)
+month_width = 360 / 12
+agg['theta'] = (agg['month'] - 1) * month_width  # 0, 30, 60, ..., 330 (Jan at 0)
+agg['width'] = month_width  # sector width
+agg['base'] = agg['weekday_sun0']  # ring start (0..6)
+agg['r'] = 1  # ring thickness (each weekday is one ring)
+# Bin values into 5 categories for color coding
+s = agg['total_value'].astype(float)
+nonzero = s[s > 0]
+if nonzero.empty:
+    agg['bin'] = 'All zero'
+    bin_labels = ['All zero']
+else:
+    # Quantile binning on non-zero values
+    binned_nz = pd.qcut(nonzero, q=5, duplicates='drop')
+    intervals = binned_nz.cat.categories
+    bin_labels = [f'{iv.left:,.0f}–{iv.right:,.0f}' for iv in intervals]
+    nz_labels = pd.Series(binned_nz.astype(str), index=nonzero.index)
+    interval_to_label = {str(iv): lbl for iv, lbl in zip(intervals, bin_labels)}
+    nz_labels = nz_labels.map(interval_to_label)
+    agg['bin'] = '0'  # default for zeros
+    agg.loc[nonzero.index, 'bin'] = nz_labels.values
+    bin_labels = ['0'] + bin_labels
+# Color palette (5 colors)
+palette = ['#edf8fb', '#b2e2e2', '#66c2a4', '#2ca25f', '#006d2c']
+unique_bins = [b for b in bin_labels if b in agg['bin'].unique()]
+colors = palette[:max(1, len(unique_bins))]
+color_map = dict(zip(unique_bins, colors))
+# Build figure with one Barpolar trace per bin
+fig = go.Figure()
+for b in unique_bins:
+    sub = agg[agg['bin'] == b]
+    fig.add_trace(go.Barpolar(
+        theta=sub['theta'],
+        r=sub['r'],
+        base=sub['base'],
+        width=sub['width'],
+        name=b,
+        marker_color=color_map[b],
+        marker_line_width=0,  # removes gaps between cells
+        hovertemplate=(
+            'Month: %{customdata[0]}<br>'
+            'Weekday: %{customdata[1]}<br>'
+            'Value: %{customdata[2]:,.2f}<extra></extra>'
+        ),
+        customdata=np.stack([sub['month_name'], sub['weekday_name'], sub['total_value']], axis=1),
+    ))
+# Radial ticks placed at ring centers (0.5..6.5)
+tickvals = [i + 0.5 for i in range(7)]
+fig.update_layout(
+    title='Circular Calendar View - Monthly Values by Weekday (2024)',
+    template='plotly_white',
+    margin=dict(l=40, r=40, t=70, b=40),
+    polar=dict(
+        angularaxis=dict(
+            direction='clockwise',
+            rotation=90,  # puts theta=0 (Jan) at top
+            tickmode='array',
+            tickvals=[i * month_width for i in range(12)],
+            ticktext=month_labels,
+        ),
+        radialaxis=dict(
+            range=[0, 7],
+            tickmode='array',
+            tickvals=tickvals,
+            ticktext=weekday_labels,  # Sun..Sat
+            showline=False,
+            gridcolor='rgba(0,0,0,0.12)',
+        ),
+    ),
+    legend_title_text='Value (binned)',
+)
+fig.show()

assets/example_polar_scatter.txt ADDED Viewed

	@@ -0,0 +1,104 @@

+import pandas as pd
+import numpy as np
+import plotly.express as px
+# Sample data for demonstration (full year 2024)
+data = {
+    'date': pd.date_range('2024-01-01', periods=365, freq='D'),
+    'value': np.random.randint(50, 200, 365)
+}
+df = pd.DataFrame(data)
+# Extract calendar components
+df['month'] = df['date'].dt.month  # 1..12
+# Convert pandas weekday (Monday=0..Sunday=6) to Sun=0..Sat=6
+df['weekday_sun0'] = (df['date'].dt.dayofweek + 1) % 7
+# Aggregate values by month x weekday
+agg = (
+    df.groupby(['month', 'weekday_sun0'], as_index=False)['value']
+    .sum()
+    .rename(columns={'value': 'total_value'})
+)
+# Ensure all 12x7 cells exist (fill missing with 0)
+full = pd.MultiIndex.from_product(
+    [range(1, 13), range(0, 7)],
+    names=['month', 'weekday_sun0']
+).to_frame(index=False)
+agg = full.merge(agg, on=['month', 'weekday_sun0'], how='left').fillna({'total_value': 0.0})
+# Labels for months and weekdays
+month_labels = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
+weekday_labels = ['Sun', 'Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat']
+agg['month_name'] = agg['month'].map(lambda m: month_labels[m-1])
+agg['weekday_name'] = agg['weekday_sun0'].map(lambda w: weekday_labels[w])
+# Rings: 1..7 (Sun=1 inner → Sat=7 outer)
+agg['r'] = agg['weekday_sun0'] + 1
+# Bubble size normalization (log1p compresses large values; then scale to pixel range)
+max_marker_px = 42
+min_marker_px = 6
+s = agg['total_value'].to_numpy(dtype=float)
+s_log = np.log1p(s)
+if np.allclose(s_log.max(), s_log.min()):
+    agg['size_px'] = min_marker_px
+else:
+    # Scale log values to [min_marker_px, max_marker_px]
+    scaled = (s_log - s_log.min()) / (s_log.max() - s_log.min())
+    agg['size_px'] = min_marker_px + scaled * (max_marker_px - min_marker_px)
+# Sizeref for area sizing
+sizeref = 2.0 * agg['size_px'].max() / (max_marker_px ** 2)
+# Build polar scatter chart
+fig = px.scatter_polar(
+    agg,
+    r='r',
+    theta='month_name',
+    size='size_px',
+    size_max=max_marker_px,
+    color='total_value',
+    color_continuous_scale='Viridis',
+    hover_data={
+        'month_name': True,
+        'weekday_name': True,
+        'total_value': ':,.2f',
+        'r': False,
+        'size_px': False,
+        'month': False,
+        'weekday_sun0': False,
+    },
+    title='Circular Calendar View - Monthly Values by Weekday (2024)',
+)
+# Force area sizing behavior
+fig.update_traces(marker=dict(sizemode='area', sizeref=sizeref, line=dict(width=0.6)))
+# Clockwise months, start Jan at top
+fig.update_layout(
+    polar=dict(
+        angularaxis=dict(
+            direction='clockwise',
+            rotation=90,  # puts Jan at the top
+        ),
+        radialaxis=dict(
+            tickmode='array',
+            tickvals=list(range(1, 8)),
+            ticktext=weekday_labels,  # Sun..Sat
+            range=[0.5, 7.5],
+            showline=False,
+            gridcolor='rgba(0,0,0,0.12)',
+        ),
+    ),
+    coloraxis_colorbar=dict(title='Value'),
+    template='plotly_white',
+    margin=dict(l=40, r=40, t=70, b=40),
+    height=800,
+)
+fig.show()

data/polar_bar_data.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:862aa7ccd01dc0f0a2226b8ca69ae4d3d3b7f0eab93fc2fc359082c0bc228d25
+size 1123

utils/helpers.py CHANGED Viewed

@@ -43,10 +43,14 @@ def get_fig_from_code(code, file_name):
 def display_response(response, file_name):
     try:
         code_block_match = re.search(r"```(?:[Pp]ython)?(.*?)```", response, re.DOTALL)
-        #print(code_block_match)
         if code_block_match:
             code_block = code_block_match.group(1).strip()
             cleaned_code = re.sub(r'(?m)^\s*fig\.show\(\)\s*$', '', code_block)
             try:

 def display_response(response, file_name):
     try:
         code_block_match = re.search(r"```(?:[Pp]ython)?(.*?)```", response, re.DOTALL)
         if code_block_match:
             code_block = code_block_match.group(1).strip()
+            # Check if code ends with fig.show() and add it if missing
+            if not re.search(r'fig\.show\(\)\s*$', code_block, re.MULTILINE):
+                code_block = code_block + "\nfig.show()"
             cleaned_code = re.sub(r'(?m)^\s*fig\.show\(\)\s*$', '', code_block)
             try:

utils/prompt.py CHANGED Viewed

@@ -61,6 +61,14 @@ def get_prompt_text() -> str:
             If any validation rule fails, return ONLY the error message in the format specified above. Do NOT generate any Python code.
             IF VALIDATION PASSES, PROCEED WITH CODE GENERATION:
             Ensure that before performing any data manipulation or plotting, the code checks for column data types and converts them if necessary.
             For example, numeric columns should be converted to floats or integers using pd.to_numeric(), and non-numeric columns should be excluded from numeric operations.
             Before creating any visualizations, ensure that any rows with NaN or missing values in the relevant columns are removed. Additionally,
@@ -71,9 +79,28 @@ def get_prompt_text() -> str:
             {data_visualization_best_practices}
             If the user requests a single visualization, figure height to 800.
             Ensure that the graph is clearly labeled with a title, x-axis label, y-axis label, and legend.
-            If the user has requested for a choropleth map of the United States of America (USA), ensure that the locations parameter in the px.choropleth() method is
-            set to the column which contains the two letter code state abbreviations, for example: AL, NY, TN, VT, UT (the column should not be determined by the name of the column,
-            but by the values it contains) and the scope parameter is set to 'usa'.
             If the user requests multiple visualizations, create a subplot for each visualization.
             The libraries required for multiple visualizations are: import plotly.graph_objects as go and from plotly.subplots import make_subplots.
             Utilize the plotly.graph_objects library's make_subplots() method to create subplots, specifying the number of rows and columns,
@@ -96,7 +123,44 @@ def get_prompt_text() -> str:
             The height of the figure (fig) should be set to 800.
             Suppose that the data is provided as a {name_of_file} file.
             Here are the first 5 rows of the data set: {data}. Follow the user's indications when creating the graph.
-            There should be no natural language text in the python code block."""
 def get_response(user_input: str, data_top5_csv_string: str, file_name: str) -> str:
     """
@@ -114,6 +178,54 @@ def get_response(user_input: str, data_top5_csv_string: str, file_name: str) ->
         Exception: If API call fails or validation fails
     """
     try:
         prompt = ChatPromptTemplate.from_messages(
             [
                 ("system", get_prompt_text()),
@@ -123,25 +235,27 @@ def get_response(user_input: str, data_top5_csv_string: str, file_name: str) ->
         chain = prompt | llm
-        response = chain.invoke(
-            {
-                "messages": [HumanMessage(content=user_input)],
-                "data_visualization_best_practices": helpers.read_doc(
-                    helpers.get_app_file_path("assets", "data_viz_best_practices.txt")
-                ),
-                "example_subplots1": helpers.read_doc(
-                    helpers.get_app_file_path("assets", "example_subplots1.txt")
-                ),
-                "example_subplots2": helpers.read_doc(
-                    helpers.get_app_file_path("assets", "example_subplots2.txt")
-                ),
-                "example_subplots3": helpers.read_doc(
-                    helpers.get_app_file_path("assets", "example_subplots3.txt")
-                ),
-                "data": data_top5_csv_string,
-                "name_of_file": file_name
-            }
-        )
         # Check if the response is an error message instead of code
         response_text = response.content.strip()

             If any validation rule fails, return ONLY the error message in the format specified above. Do NOT generate any Python code.
             IF VALIDATION PASSES, PROCEED WITH CODE GENERATION:
+            PANDAS DATA HANDLING BEST PRACTICES:
+            - Always use .copy() when creating a new dataframe from a subset or filtered view to avoid SettingWithCopyWarning.
+            - Example: df_filtered = df[df['column'] > 0].copy()
+            - When modifying data, always work on explicit copies, not chained indexing.
+            - Use .loc[] for setting values: df.loc[condition, 'column'] = value
+            - Avoid chained assignment like df[condition]['column'] = value
             Ensure that before performing any data manipulation or plotting, the code checks for column data types and converts them if necessary.
             For example, numeric columns should be converted to floats or integers using pd.to_numeric(), and non-numeric columns should be excluded from numeric operations.
             Before creating any visualizations, ensure that any rows with NaN or missing values in the relevant columns are removed. Additionally,
             {data_visualization_best_practices}
             If the user requests a single visualization, figure height to 800.
             Ensure that the graph is clearly labeled with a title, x-axis label, y-axis label, and legend.
+            SPECIFIC CHART TYPE INSTRUCTIONS:
+            CHOROPLETH MAPS:
+            CRITICAL: When creating a choropleth map of the United States, you MUST include ALL of the following parameters:
+            - locations: Set to the column containing two-letter state abbreviations (e.g., 'AL', 'NY', 'CA', 'TX')
+            - locationmode: MUST be set to 'USA-states' (this is CRITICAL - without it, the map will be blank)
+            - scope: Set to 'usa'
+            Example:
+            fig = px.choropleth(df,
+                              locations='state_code_column',
+                              locationmode='USA-states',
+                              scope='usa',
+                              color='value_column',
+                              title='Map Title')
+            The locations parameter should reference the column with state codes, not the column with full state names.
+            Always verify that locationmode='USA-states' is present in the code.
+            {dumbbell_charts_section}
+            {polar_charts_section}
             If the user requests multiple visualizations, create a subplot for each visualization.
             The libraries required for multiple visualizations are: import plotly.graph_objects as go and from plotly.subplots import make_subplots.
             Utilize the plotly.graph_objects library's make_subplots() method to create subplots, specifying the number of rows and columns,
             The height of the figure (fig) should be set to 800.
             Suppose that the data is provided as a {name_of_file} file.
             Here are the first 5 rows of the data set: {data}. Follow the user's indications when creating the graph.
+            There should be no natural language text in the python code block.
+            REMINDER: Your code MUST end with fig.show() to display the visualization."""
+def _should_include_dumbbell_examples(user_input: str) -> bool:
+    """
+    Check if user's request is about dumbbell charts or comparison visualizations.
+    Args:
+        user_input: User's visualization request
+    Returns:
+        bool: True if dumbbell chart examples should be included
+    """
+    dumbbell_keywords = [
+        'dumbbell', 'dumb bell', 'dumbell', 'dumbel', 'comparison', 'before and after', 'before after',
+        'start and end', 'start end', 'range', 'difference', 'gap', 'change over'
+    ]
+    user_input_lower = user_input.lower()
+    return any(keyword in user_input_lower for keyword in dumbbell_keywords)
+def _should_include_polar_examples(user_input: str) -> bool:
+    """
+    Check if user's request is about polar charts, calendar views, or circular visualizations.
+    Args:
+        user_input: User's visualization request
+    Returns:
+        bool: True if polar chart examples should be included
+    """
+    polar_keywords = [
+        'polar', 'circular', 'radial', 'circular fashion', 'radar', 'rose'
+    ]
+    user_input_lower = user_input.lower()
+    return any(keyword in user_input_lower for keyword in polar_keywords)
 def get_response(user_input: str, data_top5_csv_string: str, file_name: str) -> str:
     """
         Exception: If API call fails or validation fails
     """
     try:
+        # Determine if dumbbell chart examples should be included
+        include_dumbbell = _should_include_dumbbell_examples(user_input)
+        # Determine if polar chart examples should be included
+        include_polar = _should_include_polar_examples(user_input)
+        # Build dumbbell charts section conditionally
+        dumbbell_charts_section = ""
+        if include_dumbbell:
+            dumbbell_example = helpers.read_doc(
+                helpers.get_app_file_path("assets", "example_dumbbell_chart.txt")
+            )
+            dumbbell_charts_section = f"""
+            DUMBBELL PLOTS:
+            When creating a dumbbell plot, use plotly.graph_objects (go) instead of plotly.express (px).
+            Use go.Figure() and add two go.Scatter traces for the two data points, and a go.Scatter trace for the lines connecting them.
+            Ensure proper labeling of axes and title for clarity.
+            Example: \n
+            {dumbbell_example}
+            """
+        # Build polar charts section conditionally
+        polar_charts_section = ""
+        if include_polar:
+            polar_bar_example = helpers.read_doc(
+                helpers.get_app_file_path("assets", "example_polar_bar.txt")
+            )
+            polar_scatter_example = helpers.read_doc(
+                helpers.get_app_file_path("assets", "example_polar_scatter.txt")
+            )
+            polar_charts_section = f"""
+            POLAR CHARTS (RADIAL/CIRCULAR VISUALIZATIONS):
+            Polar charts are effective for displaying calendar views, weekly patterns, or circular data distributions.
+            Use them for innovative visualizations of time-based or cyclical data.
+            Example 1 - Polar Calendar with Cells (Barpolar):
+            {polar_bar_example}
+            Example 2 - Polar Calendar with Scatter:
+            {polar_scatter_example}
+            Use polar charts when the user requests:
+            - Calendar-like views
+            - Weekly or cyclical patterns
+            - Circular representations of data
+            - Radial visualizations
+            """
         prompt = ChatPromptTemplate.from_messages(
             [
                 ("system", get_prompt_text()),
         chain = prompt | llm
+        invoke_params = {
+            "messages": [HumanMessage(content=user_input)],
+            "data_visualization_best_practices": helpers.read_doc(
+                helpers.get_app_file_path("assets", "data_viz_best_practices.txt")
+            ),
+            "example_subplots1": helpers.read_doc(
+                helpers.get_app_file_path("assets", "example_subplots1.txt")
+            ),
+            "example_subplots2": helpers.read_doc(
+                helpers.get_app_file_path("assets", "example_subplots2.txt")
+            ),
+            "example_subplots3": helpers.read_doc(
+                helpers.get_app_file_path("assets", "example_subplots3.txt")
+            ),
+            "dumbbell_charts_section": dumbbell_charts_section,
+            "polar_charts_section": polar_charts_section,
+            "data": data_top5_csv_string,
+            "name_of_file": file_name
+        }
+        response = chain.invoke(invoke_params)
         # Check if the response is an error message instead of code
         response_text = response.content.strip()