chrissoria Claude commited on
Commit
f3a50f8
·
1 Parent(s): 046b57c

Switch to matplotlib for category distribution plot

Browse files

- Replace gr.BarPlot with gr.Plot using matplotlib
- Plot now properly fills space with dynamic height based on categories
- Added percentage labels on bars
- First category appears at top of chart

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

Files changed (3) hide show
  1. __pycache__/app.cpython-311.pyc +0 -0
  2. app.py +28 -11
  3. requirements.txt +1 -0
__pycache__/app.cpython-311.pyc CHANGED
Binary files a/__pycache__/app.cpython-311.pyc and b/__pycache__/app.cpython-311.pyc differ
 
app.py CHANGED
@@ -9,6 +9,7 @@ import os
9
  import time
10
  import sys
11
  from datetime import datetime
 
12
 
13
  # Import catllm
14
  try:
@@ -561,7 +562,7 @@ Provide your work in JSON format where the number belonging to each category is
561
  python_version=python_version
562
  )
563
 
564
- # Build distribution summary DataFrame for bar plot (percentages)
565
  dist_data = []
566
  total_rows = len(result)
567
  for i, cat in enumerate(categories, 1):
@@ -573,7 +574,28 @@ Provide your work in JSON format where the number belonging to each category is
573
  "Category": cat,
574
  "Percentage": round(pct, 1)
575
  })
576
- distribution_df = pd.DataFrame(dist_data)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
577
 
578
  # Build sample results DataFrame (first 5 rows)
579
  sample_data = []
@@ -590,9 +612,9 @@ Provide your work in JSON format where the number belonging to each category is
590
  })
591
  sample_df = pd.DataFrame(sample_data)
592
 
593
- # Return: distribution (visible), samples (visible), full results (visible), files, status
594
  return (
595
- gr.update(value=distribution_df, visible=True),
596
  gr.update(value=sample_df, visible=True),
597
  gr.update(value=result, visible=True),
598
  [csv_path, pdf_path],
@@ -797,13 +819,8 @@ https://github.com/chrissoria/cat-llm
797
 
798
  with gr.Column():
799
  status = gr.Markdown("Ready to classify")
800
- distribution_plot = gr.BarPlot(
801
- x="Percentage",
802
- y="Category",
803
- title="Category Distribution (%)",
804
- horizontal=True,
805
- x_lim=[0, 100],
806
- height=450,
807
  visible=False
808
  )
809
  sample_results = gr.DataFrame(label="Sample Results (First 5 Rows)", visible=False)
 
9
  import time
10
  import sys
11
  from datetime import datetime
12
+ import matplotlib.pyplot as plt
13
 
14
  # Import catllm
15
  try:
 
562
  python_version=python_version
563
  )
564
 
565
+ # Build distribution data and create matplotlib plot
566
  dist_data = []
567
  total_rows = len(result)
568
  for i, cat in enumerate(categories, 1):
 
574
  "Category": cat,
575
  "Percentage": round(pct, 1)
576
  })
577
+
578
+ # Create matplotlib horizontal bar chart
579
+ fig, ax = plt.subplots(figsize=(10, max(4, len(dist_data) * 0.8)))
580
+ categories_list = [d["Category"] for d in dist_data]
581
+ percentages = [d["Percentage"] for d in dist_data]
582
+
583
+ # Reverse order so first category is at top
584
+ categories_list = categories_list[::-1]
585
+ percentages = percentages[::-1]
586
+
587
+ bars = ax.barh(categories_list, percentages, color='#2563eb')
588
+ ax.set_xlim(0, 100)
589
+ ax.set_xlabel('Percentage (%)', fontsize=11)
590
+ ax.set_title('Category Distribution (%)', fontsize=14, fontweight='bold')
591
+
592
+ # Add percentage labels on bars
593
+ for bar, pct in zip(bars, percentages):
594
+ ax.text(bar.get_width() + 1, bar.get_y() + bar.get_height()/2,
595
+ f'{pct:.1f}%', va='center', fontsize=10)
596
+
597
+ plt.tight_layout()
598
+ distribution_fig = fig
599
 
600
  # Build sample results DataFrame (first 5 rows)
601
  sample_data = []
 
612
  })
613
  sample_df = pd.DataFrame(sample_data)
614
 
615
+ # Return: distribution plot (visible), samples (visible), full results (visible), files, status
616
  return (
617
+ gr.update(value=distribution_fig, visible=True),
618
  gr.update(value=sample_df, visible=True),
619
  gr.update(value=result, visible=True),
620
  [csv_path, pdf_path],
 
819
 
820
  with gr.Column():
821
  status = gr.Markdown("Ready to classify")
822
+ distribution_plot = gr.Plot(
823
+ label="Category Distribution (%)",
 
 
 
 
 
824
  visible=False
825
  )
826
  sample_results = gr.DataFrame(label="Sample Results (First 5 Rows)", visible=False)
requirements.txt CHANGED
@@ -7,3 +7,4 @@ openpyxl
7
  requests
8
  regex
9
  reportlab
 
 
7
  requests
8
  regex
9
  reportlab
10
+ matplotlib