Spaces:

CatLLM
/

survey-classifier

Running

chrissoria Claude commited on 14 days ago

Commit

7e9f122

1 Parent(s): 1505f12

Move Sample Results to its own page in methodology report

PDF page structure now:
- Page 1: Category Mapping, Other Output Columns
- Page 2: Sample Results (First 5 Rows)
- Page 3: Category Distribution
- Page 4: Classification Summary
- Page 5: Prompt Template
- Page 6: Reproducibility Code

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

Files changed (2) hide show

__pycache__/app.cpython-311.pyc +0 -0
app.py +33 -33

__pycache__/app.cpython-311.pyc CHANGED Viewed

Binary files a/__pycache__/app.cpython-311.pyc and b/__pycache__/app.cpython-311.pyc differ

app.py CHANGED Viewed

@@ -82,7 +82,7 @@ def generate_methodology_report_pdf(categories, model, column_name, num_rows, mo
     story = []
-    # === PAGE 1: Title, Category Mapping, Sample Results ===
     story.append(Paragraph("CatLLM Methodology Report", title_style))
     story.append(Paragraph(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", normal_style))
     story.append(Spacer(1, 15))
@@ -108,11 +108,33 @@ def generate_methodology_report_pdf(categories, model, column_name, num_rows, mo
     story.append(cat_table)
     story.append(Spacer(1, 15))
-    # Sample Results (first 5 rows)
     if result_df is not None and len(result_df) > 0:
-        story.append(Paragraph("Sample Results (First 5 Rows)", heading_style))
         story.append(Paragraph("Example classifications showing original text and assigned categories:", normal_style))
-        story.append(Spacer(1, 8))
         sample_data = [["Original Text (truncated)", "Assigned Categories"]]
         sample_df = result_df.head(5)
@@ -135,35 +157,13 @@ def generate_methodology_report_pdf(categories, model, column_name, num_rows, mo
             ('BACKGROUND', (0, 0), (-1, 0), colors.grey),
             ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
             ('GRID', (0, 0), (-1, -1), 1, colors.black),
-            ('PADDING', (0, 0), (-1, -1), 6),
-            ('FONTSIZE', (0, 0), (-1, -1), 8),
             ('VALIGN', (0, 0), (-1, -1), 'TOP'),
         ]))
         story.append(sample_table)
-        story.append(Spacer(1, 15))
-    # Other columns
-    story.append(Paragraph("Other Output Columns", heading_style))
-    other_cols = [
-        ["Column Name", "Description"],
-        ["survey_input", "The original text that was classified"],
-        ["model_response", "Raw response from the LLM"],
-        ["json", "Extracted JSON with category assignments"],
-        ["processing_status", "'success' if classification worked, 'error' if failed"],
-        ["categories_id", "Comma-separated list of assigned category numbers"],
-    ]
-    other_table = Table(other_cols, colWidths=[120, 330])
-    other_table.setStyle(TableStyle([
-        ('BACKGROUND', (0, 0), (-1, 0), colors.grey),
-        ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
-        ('GRID', (0, 0), (-1, -1), 1, colors.black),
-        ('PADDING', (0, 0), (-1, -1), 6),
-        ('BACKGROUND', (0, 1), (0, -1), colors.lightgrey),
-        ('FONTSIZE', (0, 0), (-1, -1), 9),
-    ]))
-    story.append(other_table)
-    # === PAGE 2: Category Distribution ===
     story.append(PageBreak())
     story.append(Paragraph("Category Distribution", title_style))
     story.append(Paragraph("Count and percentage of responses assigned to each category:", normal_style))
@@ -195,14 +195,14 @@ def generate_methodology_report_pdf(categories, model, column_name, num_rows, mo
         story.append(Spacer(1, 15))
         story.append(Paragraph(f"<i>Note: Percentages may sum to more than 100% as responses can be assigned to multiple categories.</i>", normal_style))
-    # Citation on page 2
     story.append(Spacer(1, 30))
     story.append(Paragraph("Citation", heading_style))
     story.append(Paragraph("If you use CatLLM in your research, please cite:", normal_style))
     story.append(Spacer(1, 5))
     story.append(Paragraph("Soria, C. (2025). CatLLM: A Python package for LLM-based text classification. https://github.com/chrissoria/cat-llm", normal_style))
-    # === PAGE 3: Classification Summary (Expanded) ===
     story.append(PageBreak())
     story.append(Paragraph("Classification Summary", title_style))
     story.append(Spacer(1, 15))
@@ -286,7 +286,7 @@ def generate_methodology_report_pdf(categories, model, column_name, num_rows, mo
     ]))
     story.append(version_table)
-    # === PAGE 4: Prompt Template ===
     story.append(PageBreak())
     story.append(Paragraph("Prompt Template Used", title_style))
     story.append(Paragraph("The following prompt template was sent to the LLM for each classification:", normal_style))
@@ -323,7 +323,7 @@ is the key and a 1 if the category is present and a 0 if not.'''
             else:
                 story.append(Spacer(1, 5))
-    # === PAGE 5: Reproducibility Code ===
     story.append(PageBreak())
     story.append(Paragraph("Reproducibility Code", title_style))
     story.append(Paragraph("Use the following Python code to reproduce this classification:", normal_style))

     story = []
+    # === PAGE 1: Title, Category Mapping ===
     story.append(Paragraph("CatLLM Methodology Report", title_style))
     story.append(Paragraph(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", normal_style))
     story.append(Spacer(1, 15))
     story.append(cat_table)
     story.append(Spacer(1, 15))
+    # Other columns
+    story.append(Paragraph("Other Output Columns", heading_style))
+    other_cols = [
+        ["Column Name", "Description"],
+        ["survey_input", "The original text that was classified"],
+        ["model_response", "Raw response from the LLM"],
+        ["json", "Extracted JSON with category assignments"],
+        ["processing_status", "'success' if classification worked, 'error' if failed"],
+        ["categories_id", "Comma-separated list of assigned category numbers"],
+    ]
+    other_table = Table(other_cols, colWidths=[120, 330])
+    other_table.setStyle(TableStyle([
+        ('BACKGROUND', (0, 0), (-1, 0), colors.grey),
+        ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
+        ('GRID', (0, 0), (-1, -1), 1, colors.black),
+        ('PADDING', (0, 0), (-1, -1), 6),
+        ('BACKGROUND', (0, 1), (0, -1), colors.lightgrey),
+        ('FONTSIZE', (0, 0), (-1, -1), 9),
+    ]))
+    story.append(other_table)
+    # === PAGE 2: Sample Results ===
     if result_df is not None and len(result_df) > 0:
+        story.append(PageBreak())
+        story.append(Paragraph("Sample Results (First 5 Rows)", title_style))
         story.append(Paragraph("Example classifications showing original text and assigned categories:", normal_style))
+        story.append(Spacer(1, 15))
         sample_data = [["Original Text (truncated)", "Assigned Categories"]]
         sample_df = result_df.head(5)
             ('BACKGROUND', (0, 0), (-1, 0), colors.grey),
             ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
             ('GRID', (0, 0), (-1, -1), 1, colors.black),
+            ('PADDING', (0, 0), (-1, -1), 8),
+            ('FONTSIZE', (0, 0), (-1, -1), 9),
             ('VALIGN', (0, 0), (-1, -1), 'TOP'),
         ]))
         story.append(sample_table)
+    # === PAGE 3: Category Distribution ===
     story.append(PageBreak())
     story.append(Paragraph("Category Distribution", title_style))
     story.append(Paragraph("Count and percentage of responses assigned to each category:", normal_style))
         story.append(Spacer(1, 15))
         story.append(Paragraph(f"<i>Note: Percentages may sum to more than 100% as responses can be assigned to multiple categories.</i>", normal_style))
+    # Citation on page 3
     story.append(Spacer(1, 30))
     story.append(Paragraph("Citation", heading_style))
     story.append(Paragraph("If you use CatLLM in your research, please cite:", normal_style))
     story.append(Spacer(1, 5))
     story.append(Paragraph("Soria, C. (2025). CatLLM: A Python package for LLM-based text classification. https://github.com/chrissoria/cat-llm", normal_style))
+    # === PAGE 4: Classification Summary (Expanded) ===
     story.append(PageBreak())
     story.append(Paragraph("Classification Summary", title_style))
     story.append(Spacer(1, 15))
     ]))
     story.append(version_table)
+    # === PAGE 5: Prompt Template ===
     story.append(PageBreak())
     story.append(Paragraph("Prompt Template Used", title_style))
     story.append(Paragraph("The following prompt template was sent to the LLM for each classification:", normal_style))
             else:
                 story.append(Spacer(1, 5))
+    # === PAGE 6: Reproducibility Code ===
     story.append(PageBreak())
     story.append(Paragraph("Reproducibility Code", title_style))
     story.append(Paragraph("Use the following Python code to reproduce this classification:", normal_style))