Spaces:

CatLLM
/

survey-classifier

Running

chrissoria Claude commited on 11 days ago

Commit

f3a50f8

1 Parent(s): 046b57c

Switch to matplotlib for category distribution plot

- Replace gr.BarPlot with gr.Plot using matplotlib
- Plot now properly fills space with dynamic height based on categories
- Added percentage labels on bars
- First category appears at top of chart

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

Files changed (3) hide show

__pycache__/app.cpython-311.pyc +0 -0
app.py +28 -11
requirements.txt +1 -0

__pycache__/app.cpython-311.pyc CHANGED Viewed

Binary files a/__pycache__/app.cpython-311.pyc and b/__pycache__/app.cpython-311.pyc differ

app.py CHANGED Viewed

@@ -9,6 +9,7 @@ import os
 import time
 import sys
 from datetime import datetime
 # Import catllm
 try:
@@ -561,7 +562,7 @@ Provide your work in JSON format where the number belonging to each category is
             python_version=python_version
         )
-        # Build distribution summary DataFrame for bar plot (percentages)
         dist_data = []
         total_rows = len(result)
         for i, cat in enumerate(categories, 1):
@@ -573,7 +574,28 @@ Provide your work in JSON format where the number belonging to each category is
                     "Category": cat,
                     "Percentage": round(pct, 1)
                 })
-        distribution_df = pd.DataFrame(dist_data)
         # Build sample results DataFrame (first 5 rows)
         sample_data = []
@@ -590,9 +612,9 @@ Provide your work in JSON format where the number belonging to each category is
             })
         sample_df = pd.DataFrame(sample_data)
-        # Return: distribution (visible), samples (visible), full results (visible), files, status
         return (
-            gr.update(value=distribution_df, visible=True),
             gr.update(value=sample_df, visible=True),
             gr.update(value=result, visible=True),
             [csv_path, pdf_path],
@@ -797,13 +819,8 @@ https://github.com/chrissoria/cat-llm
         with gr.Column():
             status = gr.Markdown("Ready to classify")
-            distribution_plot = gr.BarPlot(
-                x="Percentage",
-                y="Category",
-                title="Category Distribution (%)",
-                horizontal=True,
-                x_lim=[0, 100],
-                height=450,
                 visible=False
             )
             sample_results = gr.DataFrame(label="Sample Results (First 5 Rows)", visible=False)

 import time
 import sys
 from datetime import datetime
+import matplotlib.pyplot as plt
 # Import catllm
 try:
             python_version=python_version
         )
+        # Build distribution data and create matplotlib plot
         dist_data = []
         total_rows = len(result)
         for i, cat in enumerate(categories, 1):
                     "Category": cat,
                     "Percentage": round(pct, 1)
                 })
+        # Create matplotlib horizontal bar chart
+        fig, ax = plt.subplots(figsize=(10, max(4, len(dist_data) * 0.8)))
+        categories_list = [d["Category"] for d in dist_data]
+        percentages = [d["Percentage"] for d in dist_data]
+        # Reverse order so first category is at top
+        categories_list = categories_list[::-1]
+        percentages = percentages[::-1]
+        bars = ax.barh(categories_list, percentages, color='#2563eb')
+        ax.set_xlim(0, 100)
+        ax.set_xlabel('Percentage (%)', fontsize=11)
+        ax.set_title('Category Distribution (%)', fontsize=14, fontweight='bold')
+        # Add percentage labels on bars
+        for bar, pct in zip(bars, percentages):
+            ax.text(bar.get_width() + 1, bar.get_y() + bar.get_height()/2,
+                   f'{pct:.1f}%', va='center', fontsize=10)
+        plt.tight_layout()
+        distribution_fig = fig
         # Build sample results DataFrame (first 5 rows)
         sample_data = []
             })
         sample_df = pd.DataFrame(sample_data)
+        # Return: distribution plot (visible), samples (visible), full results (visible), files, status
         return (
+            gr.update(value=distribution_fig, visible=True),
             gr.update(value=sample_df, visible=True),
             gr.update(value=result, visible=True),
             [csv_path, pdf_path],
         with gr.Column():
             status = gr.Markdown("Ready to classify")
+            distribution_plot = gr.Plot(
+                label="Category Distribution (%)",
                 visible=False
             )
             sample_results = gr.DataFrame(label="Sample Results (First 5 Rows)", visible=False)

requirements.txt CHANGED Viewed

@@ -7,3 +7,4 @@ openpyxl
 requests
 regex
 reportlab

 requests
 regex
 reportlab
+matplotlib