Spaces:
Running
Running
Commit
·
f3a50f8
1
Parent(s):
046b57c
Switch to matplotlib for category distribution plot
Browse files- Replace gr.BarPlot with gr.Plot using matplotlib
- Plot now properly fills space with dynamic height based on categories
- Added percentage labels on bars
- First category appears at top of chart
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
- __pycache__/app.cpython-311.pyc +0 -0
- app.py +28 -11
- requirements.txt +1 -0
__pycache__/app.cpython-311.pyc
CHANGED
|
Binary files a/__pycache__/app.cpython-311.pyc and b/__pycache__/app.cpython-311.pyc differ
|
|
|
app.py
CHANGED
|
@@ -9,6 +9,7 @@ import os
|
|
| 9 |
import time
|
| 10 |
import sys
|
| 11 |
from datetime import datetime
|
|
|
|
| 12 |
|
| 13 |
# Import catllm
|
| 14 |
try:
|
|
@@ -561,7 +562,7 @@ Provide your work in JSON format where the number belonging to each category is
|
|
| 561 |
python_version=python_version
|
| 562 |
)
|
| 563 |
|
| 564 |
-
# Build distribution
|
| 565 |
dist_data = []
|
| 566 |
total_rows = len(result)
|
| 567 |
for i, cat in enumerate(categories, 1):
|
|
@@ -573,7 +574,28 @@ Provide your work in JSON format where the number belonging to each category is
|
|
| 573 |
"Category": cat,
|
| 574 |
"Percentage": round(pct, 1)
|
| 575 |
})
|
| 576 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 577 |
|
| 578 |
# Build sample results DataFrame (first 5 rows)
|
| 579 |
sample_data = []
|
|
@@ -590,9 +612,9 @@ Provide your work in JSON format where the number belonging to each category is
|
|
| 590 |
})
|
| 591 |
sample_df = pd.DataFrame(sample_data)
|
| 592 |
|
| 593 |
-
# Return: distribution (visible), samples (visible), full results (visible), files, status
|
| 594 |
return (
|
| 595 |
-
gr.update(value=
|
| 596 |
gr.update(value=sample_df, visible=True),
|
| 597 |
gr.update(value=result, visible=True),
|
| 598 |
[csv_path, pdf_path],
|
|
@@ -797,13 +819,8 @@ https://github.com/chrissoria/cat-llm
|
|
| 797 |
|
| 798 |
with gr.Column():
|
| 799 |
status = gr.Markdown("Ready to classify")
|
| 800 |
-
distribution_plot = gr.
|
| 801 |
-
|
| 802 |
-
y="Category",
|
| 803 |
-
title="Category Distribution (%)",
|
| 804 |
-
horizontal=True,
|
| 805 |
-
x_lim=[0, 100],
|
| 806 |
-
height=450,
|
| 807 |
visible=False
|
| 808 |
)
|
| 809 |
sample_results = gr.DataFrame(label="Sample Results (First 5 Rows)", visible=False)
|
|
|
|
| 9 |
import time
|
| 10 |
import sys
|
| 11 |
from datetime import datetime
|
| 12 |
+
import matplotlib.pyplot as plt
|
| 13 |
|
| 14 |
# Import catllm
|
| 15 |
try:
|
|
|
|
| 562 |
python_version=python_version
|
| 563 |
)
|
| 564 |
|
| 565 |
+
# Build distribution data and create matplotlib plot
|
| 566 |
dist_data = []
|
| 567 |
total_rows = len(result)
|
| 568 |
for i, cat in enumerate(categories, 1):
|
|
|
|
| 574 |
"Category": cat,
|
| 575 |
"Percentage": round(pct, 1)
|
| 576 |
})
|
| 577 |
+
|
| 578 |
+
# Create matplotlib horizontal bar chart
|
| 579 |
+
fig, ax = plt.subplots(figsize=(10, max(4, len(dist_data) * 0.8)))
|
| 580 |
+
categories_list = [d["Category"] for d in dist_data]
|
| 581 |
+
percentages = [d["Percentage"] for d in dist_data]
|
| 582 |
+
|
| 583 |
+
# Reverse order so first category is at top
|
| 584 |
+
categories_list = categories_list[::-1]
|
| 585 |
+
percentages = percentages[::-1]
|
| 586 |
+
|
| 587 |
+
bars = ax.barh(categories_list, percentages, color='#2563eb')
|
| 588 |
+
ax.set_xlim(0, 100)
|
| 589 |
+
ax.set_xlabel('Percentage (%)', fontsize=11)
|
| 590 |
+
ax.set_title('Category Distribution (%)', fontsize=14, fontweight='bold')
|
| 591 |
+
|
| 592 |
+
# Add percentage labels on bars
|
| 593 |
+
for bar, pct in zip(bars, percentages):
|
| 594 |
+
ax.text(bar.get_width() + 1, bar.get_y() + bar.get_height()/2,
|
| 595 |
+
f'{pct:.1f}%', va='center', fontsize=10)
|
| 596 |
+
|
| 597 |
+
plt.tight_layout()
|
| 598 |
+
distribution_fig = fig
|
| 599 |
|
| 600 |
# Build sample results DataFrame (first 5 rows)
|
| 601 |
sample_data = []
|
|
|
|
| 612 |
})
|
| 613 |
sample_df = pd.DataFrame(sample_data)
|
| 614 |
|
| 615 |
+
# Return: distribution plot (visible), samples (visible), full results (visible), files, status
|
| 616 |
return (
|
| 617 |
+
gr.update(value=distribution_fig, visible=True),
|
| 618 |
gr.update(value=sample_df, visible=True),
|
| 619 |
gr.update(value=result, visible=True),
|
| 620 |
[csv_path, pdf_path],
|
|
|
|
| 819 |
|
| 820 |
with gr.Column():
|
| 821 |
status = gr.Markdown("Ready to classify")
|
| 822 |
+
distribution_plot = gr.Plot(
|
| 823 |
+
label="Category Distribution (%)",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 824 |
visible=False
|
| 825 |
)
|
| 826 |
sample_results = gr.DataFrame(label="Sample Results (First 5 Rows)", visible=False)
|
requirements.txt
CHANGED
|
@@ -7,3 +7,4 @@ openpyxl
|
|
| 7 |
requests
|
| 8 |
regex
|
| 9 |
reportlab
|
|
|
|
|
|
| 7 |
requests
|
| 8 |
regex
|
| 9 |
reportlab
|
| 10 |
+
matplotlib
|