Spaces:
Running
Running
Commit
·
7e9f122
1
Parent(s):
1505f12
Move Sample Results to its own page in methodology report
Browse filesPDF page structure now:
- Page 1: Category Mapping, Other Output Columns
- Page 2: Sample Results (First 5 Rows)
- Page 3: Category Distribution
- Page 4: Classification Summary
- Page 5: Prompt Template
- Page 6: Reproducibility Code
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
- __pycache__/app.cpython-311.pyc +0 -0
- app.py +33 -33
__pycache__/app.cpython-311.pyc
CHANGED
|
Binary files a/__pycache__/app.cpython-311.pyc and b/__pycache__/app.cpython-311.pyc differ
|
|
|
app.py
CHANGED
|
@@ -82,7 +82,7 @@ def generate_methodology_report_pdf(categories, model, column_name, num_rows, mo
|
|
| 82 |
|
| 83 |
story = []
|
| 84 |
|
| 85 |
-
# === PAGE 1: Title, Category Mapping
|
| 86 |
story.append(Paragraph("CatLLM Methodology Report", title_style))
|
| 87 |
story.append(Paragraph(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", normal_style))
|
| 88 |
story.append(Spacer(1, 15))
|
|
@@ -108,11 +108,33 @@ def generate_methodology_report_pdf(categories, model, column_name, num_rows, mo
|
|
| 108 |
story.append(cat_table)
|
| 109 |
story.append(Spacer(1, 15))
|
| 110 |
|
| 111 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
if result_df is not None and len(result_df) > 0:
|
| 113 |
-
story.append(
|
|
|
|
| 114 |
story.append(Paragraph("Example classifications showing original text and assigned categories:", normal_style))
|
| 115 |
-
story.append(Spacer(1,
|
| 116 |
|
| 117 |
sample_data = [["Original Text (truncated)", "Assigned Categories"]]
|
| 118 |
sample_df = result_df.head(5)
|
|
@@ -135,35 +157,13 @@ def generate_methodology_report_pdf(categories, model, column_name, num_rows, mo
|
|
| 135 |
('BACKGROUND', (0, 0), (-1, 0), colors.grey),
|
| 136 |
('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
|
| 137 |
('GRID', (0, 0), (-1, -1), 1, colors.black),
|
| 138 |
-
('PADDING', (0, 0), (-1, -1),
|
| 139 |
-
('FONTSIZE', (0, 0), (-1, -1),
|
| 140 |
('VALIGN', (0, 0), (-1, -1), 'TOP'),
|
| 141 |
]))
|
| 142 |
story.append(sample_table)
|
| 143 |
-
story.append(Spacer(1, 15))
|
| 144 |
-
|
| 145 |
-
# Other columns
|
| 146 |
-
story.append(Paragraph("Other Output Columns", heading_style))
|
| 147 |
-
other_cols = [
|
| 148 |
-
["Column Name", "Description"],
|
| 149 |
-
["survey_input", "The original text that was classified"],
|
| 150 |
-
["model_response", "Raw response from the LLM"],
|
| 151 |
-
["json", "Extracted JSON with category assignments"],
|
| 152 |
-
["processing_status", "'success' if classification worked, 'error' if failed"],
|
| 153 |
-
["categories_id", "Comma-separated list of assigned category numbers"],
|
| 154 |
-
]
|
| 155 |
-
other_table = Table(other_cols, colWidths=[120, 330])
|
| 156 |
-
other_table.setStyle(TableStyle([
|
| 157 |
-
('BACKGROUND', (0, 0), (-1, 0), colors.grey),
|
| 158 |
-
('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
|
| 159 |
-
('GRID', (0, 0), (-1, -1), 1, colors.black),
|
| 160 |
-
('PADDING', (0, 0), (-1, -1), 6),
|
| 161 |
-
('BACKGROUND', (0, 1), (0, -1), colors.lightgrey),
|
| 162 |
-
('FONTSIZE', (0, 0), (-1, -1), 9),
|
| 163 |
-
]))
|
| 164 |
-
story.append(other_table)
|
| 165 |
|
| 166 |
-
# === PAGE
|
| 167 |
story.append(PageBreak())
|
| 168 |
story.append(Paragraph("Category Distribution", title_style))
|
| 169 |
story.append(Paragraph("Count and percentage of responses assigned to each category:", normal_style))
|
|
@@ -195,14 +195,14 @@ def generate_methodology_report_pdf(categories, model, column_name, num_rows, mo
|
|
| 195 |
story.append(Spacer(1, 15))
|
| 196 |
story.append(Paragraph(f"<i>Note: Percentages may sum to more than 100% as responses can be assigned to multiple categories.</i>", normal_style))
|
| 197 |
|
| 198 |
-
# Citation on page
|
| 199 |
story.append(Spacer(1, 30))
|
| 200 |
story.append(Paragraph("Citation", heading_style))
|
| 201 |
story.append(Paragraph("If you use CatLLM in your research, please cite:", normal_style))
|
| 202 |
story.append(Spacer(1, 5))
|
| 203 |
story.append(Paragraph("Soria, C. (2025). CatLLM: A Python package for LLM-based text classification. https://github.com/chrissoria/cat-llm", normal_style))
|
| 204 |
|
| 205 |
-
# === PAGE
|
| 206 |
story.append(PageBreak())
|
| 207 |
story.append(Paragraph("Classification Summary", title_style))
|
| 208 |
story.append(Spacer(1, 15))
|
|
@@ -286,7 +286,7 @@ def generate_methodology_report_pdf(categories, model, column_name, num_rows, mo
|
|
| 286 |
]))
|
| 287 |
story.append(version_table)
|
| 288 |
|
| 289 |
-
# === PAGE
|
| 290 |
story.append(PageBreak())
|
| 291 |
story.append(Paragraph("Prompt Template Used", title_style))
|
| 292 |
story.append(Paragraph("The following prompt template was sent to the LLM for each classification:", normal_style))
|
|
@@ -323,7 +323,7 @@ is the key and a 1 if the category is present and a 0 if not.'''
|
|
| 323 |
else:
|
| 324 |
story.append(Spacer(1, 5))
|
| 325 |
|
| 326 |
-
# === PAGE
|
| 327 |
story.append(PageBreak())
|
| 328 |
story.append(Paragraph("Reproducibility Code", title_style))
|
| 329 |
story.append(Paragraph("Use the following Python code to reproduce this classification:", normal_style))
|
|
|
|
| 82 |
|
| 83 |
story = []
|
| 84 |
|
| 85 |
+
# === PAGE 1: Title, Category Mapping ===
|
| 86 |
story.append(Paragraph("CatLLM Methodology Report", title_style))
|
| 87 |
story.append(Paragraph(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", normal_style))
|
| 88 |
story.append(Spacer(1, 15))
|
|
|
|
| 108 |
story.append(cat_table)
|
| 109 |
story.append(Spacer(1, 15))
|
| 110 |
|
| 111 |
+
# Other columns
|
| 112 |
+
story.append(Paragraph("Other Output Columns", heading_style))
|
| 113 |
+
other_cols = [
|
| 114 |
+
["Column Name", "Description"],
|
| 115 |
+
["survey_input", "The original text that was classified"],
|
| 116 |
+
["model_response", "Raw response from the LLM"],
|
| 117 |
+
["json", "Extracted JSON with category assignments"],
|
| 118 |
+
["processing_status", "'success' if classification worked, 'error' if failed"],
|
| 119 |
+
["categories_id", "Comma-separated list of assigned category numbers"],
|
| 120 |
+
]
|
| 121 |
+
other_table = Table(other_cols, colWidths=[120, 330])
|
| 122 |
+
other_table.setStyle(TableStyle([
|
| 123 |
+
('BACKGROUND', (0, 0), (-1, 0), colors.grey),
|
| 124 |
+
('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
|
| 125 |
+
('GRID', (0, 0), (-1, -1), 1, colors.black),
|
| 126 |
+
('PADDING', (0, 0), (-1, -1), 6),
|
| 127 |
+
('BACKGROUND', (0, 1), (0, -1), colors.lightgrey),
|
| 128 |
+
('FONTSIZE', (0, 0), (-1, -1), 9),
|
| 129 |
+
]))
|
| 130 |
+
story.append(other_table)
|
| 131 |
+
|
| 132 |
+
# === PAGE 2: Sample Results ===
|
| 133 |
if result_df is not None and len(result_df) > 0:
|
| 134 |
+
story.append(PageBreak())
|
| 135 |
+
story.append(Paragraph("Sample Results (First 5 Rows)", title_style))
|
| 136 |
story.append(Paragraph("Example classifications showing original text and assigned categories:", normal_style))
|
| 137 |
+
story.append(Spacer(1, 15))
|
| 138 |
|
| 139 |
sample_data = [["Original Text (truncated)", "Assigned Categories"]]
|
| 140 |
sample_df = result_df.head(5)
|
|
|
|
| 157 |
('BACKGROUND', (0, 0), (-1, 0), colors.grey),
|
| 158 |
('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
|
| 159 |
('GRID', (0, 0), (-1, -1), 1, colors.black),
|
| 160 |
+
('PADDING', (0, 0), (-1, -1), 8),
|
| 161 |
+
('FONTSIZE', (0, 0), (-1, -1), 9),
|
| 162 |
('VALIGN', (0, 0), (-1, -1), 'TOP'),
|
| 163 |
]))
|
| 164 |
story.append(sample_table)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 165 |
|
| 166 |
+
# === PAGE 3: Category Distribution ===
|
| 167 |
story.append(PageBreak())
|
| 168 |
story.append(Paragraph("Category Distribution", title_style))
|
| 169 |
story.append(Paragraph("Count and percentage of responses assigned to each category:", normal_style))
|
|
|
|
| 195 |
story.append(Spacer(1, 15))
|
| 196 |
story.append(Paragraph(f"<i>Note: Percentages may sum to more than 100% as responses can be assigned to multiple categories.</i>", normal_style))
|
| 197 |
|
| 198 |
+
# Citation on page 3
|
| 199 |
story.append(Spacer(1, 30))
|
| 200 |
story.append(Paragraph("Citation", heading_style))
|
| 201 |
story.append(Paragraph("If you use CatLLM in your research, please cite:", normal_style))
|
| 202 |
story.append(Spacer(1, 5))
|
| 203 |
story.append(Paragraph("Soria, C. (2025). CatLLM: A Python package for LLM-based text classification. https://github.com/chrissoria/cat-llm", normal_style))
|
| 204 |
|
| 205 |
+
# === PAGE 4: Classification Summary (Expanded) ===
|
| 206 |
story.append(PageBreak())
|
| 207 |
story.append(Paragraph("Classification Summary", title_style))
|
| 208 |
story.append(Spacer(1, 15))
|
|
|
|
| 286 |
]))
|
| 287 |
story.append(version_table)
|
| 288 |
|
| 289 |
+
# === PAGE 5: Prompt Template ===
|
| 290 |
story.append(PageBreak())
|
| 291 |
story.append(Paragraph("Prompt Template Used", title_style))
|
| 292 |
story.append(Paragraph("The following prompt template was sent to the LLM for each classification:", normal_style))
|
|
|
|
| 323 |
else:
|
| 324 |
story.append(Spacer(1, 5))
|
| 325 |
|
| 326 |
+
# === PAGE 6: Reproducibility Code ===
|
| 327 |
story.append(PageBreak())
|
| 328 |
story.append(Paragraph("Reproducibility Code", title_style))
|
| 329 |
story.append(Paragraph("Use the following Python code to reproduce this classification:", normal_style))
|