Spaces:
Running
Running
Commit
·
a724e97
1
Parent(s):
cfd3f46
Rename codebook to methodology report
Browse files- Rename function from generate_codebook_pdf to generate_methodology_report_pdf
- Update PDF title to "CatLLM Methodology Report"
- Update file suffix to _methodology_report.pdf
- Update UI label to "Download Results (CSV + Methodology Report)"
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
- __pycache__/app.cpython-311.pyc +0 -0
- app.py +8 -8
__pycache__/app.cpython-311.pyc
CHANGED
|
Binary files a/__pycache__/app.cpython-311.pyc and b/__pycache__/app.cpython-311.pyc differ
|
|
|
app.py
CHANGED
|
@@ -60,17 +60,17 @@ def is_free_model(model, model_tier):
|
|
| 60 |
return model_tier == "Free Models"
|
| 61 |
|
| 62 |
|
| 63 |
-
def
|
| 64 |
result_df=None, processing_time=None, prompt_template=None,
|
| 65 |
data_quality=None, catllm_version=None, python_version=None):
|
| 66 |
-
"""Generate a PDF
|
| 67 |
from reportlab.lib.pagesizes import letter
|
| 68 |
from reportlab.lib import colors
|
| 69 |
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
|
| 70 |
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak
|
| 71 |
|
| 72 |
# Create temp file for PDF
|
| 73 |
-
pdf_file = tempfile.NamedTemporaryFile(mode='wb', suffix='
|
| 74 |
doc = SimpleDocTemplate(pdf_file.name, pagesize=letter)
|
| 75 |
styles = getSampleStyleSheet()
|
| 76 |
|
|
@@ -83,7 +83,7 @@ def generate_codebook_pdf(categories, model, column_name, num_rows, model_source
|
|
| 83 |
story = []
|
| 84 |
|
| 85 |
# === PAGE 1: Title, Category Mapping, Sample Results ===
|
| 86 |
-
story.append(Paragraph("CatLLM
|
| 87 |
story.append(Paragraph(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", normal_style))
|
| 88 |
story.append(Spacer(1, 15))
|
| 89 |
|
|
@@ -516,7 +516,7 @@ def classify_data(spreadsheet_file, spreadsheet_column,
|
|
| 516 |
result.to_csv(f.name, index=False)
|
| 517 |
csv_path = f.name
|
| 518 |
|
| 519 |
-
# Get original filename for
|
| 520 |
original_filename = file_path.split("/")[-1]
|
| 521 |
|
| 522 |
# Calculate success rate
|
|
@@ -539,8 +539,8 @@ is the key and a 1 if the category is present and a 0 if not.'''
|
|
| 539 |
catllm_version = "unknown"
|
| 540 |
python_version = sys.version.split()[0]
|
| 541 |
|
| 542 |
-
# Generate PDF
|
| 543 |
-
pdf_path =
|
| 544 |
categories=categories,
|
| 545 |
model=actual_model,
|
| 546 |
column_name=spreadsheet_column,
|
|
@@ -800,7 +800,7 @@ https://github.com/chrissoria/cat-llm
|
|
| 800 |
)
|
| 801 |
sample_results = gr.DataFrame(label="Sample Results (First 5 Rows)", visible=False)
|
| 802 |
results = gr.DataFrame(label="Full Classification Results", visible=False)
|
| 803 |
-
download_file = gr.File(label="Download Results (CSV +
|
| 804 |
code_output = gr.Code(
|
| 805 |
label="Python Code",
|
| 806 |
language="python",
|
|
|
|
| 60 |
return model_tier == "Free Models"
|
| 61 |
|
| 62 |
|
| 63 |
+
def generate_methodology_report_pdf(categories, model, column_name, num_rows, model_source, filename, success_rate,
|
| 64 |
result_df=None, processing_time=None, prompt_template=None,
|
| 65 |
data_quality=None, catllm_version=None, python_version=None):
|
| 66 |
+
"""Generate a PDF methodology report for reproducibility and transparency."""
|
| 67 |
from reportlab.lib.pagesizes import letter
|
| 68 |
from reportlab.lib import colors
|
| 69 |
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
|
| 70 |
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak
|
| 71 |
|
| 72 |
# Create temp file for PDF
|
| 73 |
+
pdf_file = tempfile.NamedTemporaryFile(mode='wb', suffix='_methodology_report.pdf', delete=False)
|
| 74 |
doc = SimpleDocTemplate(pdf_file.name, pagesize=letter)
|
| 75 |
styles = getSampleStyleSheet()
|
| 76 |
|
|
|
|
| 83 |
story = []
|
| 84 |
|
| 85 |
# === PAGE 1: Title, Category Mapping, Sample Results ===
|
| 86 |
+
story.append(Paragraph("CatLLM Methodology Report", title_style))
|
| 87 |
story.append(Paragraph(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", normal_style))
|
| 88 |
story.append(Spacer(1, 15))
|
| 89 |
|
|
|
|
| 516 |
result.to_csv(f.name, index=False)
|
| 517 |
csv_path = f.name
|
| 518 |
|
| 519 |
+
# Get original filename for methodology report
|
| 520 |
original_filename = file_path.split("/")[-1]
|
| 521 |
|
| 522 |
# Calculate success rate
|
|
|
|
| 539 |
catllm_version = "unknown"
|
| 540 |
python_version = sys.version.split()[0]
|
| 541 |
|
| 542 |
+
# Generate PDF methodology report with all new data
|
| 543 |
+
pdf_path = generate_methodology_report_pdf(
|
| 544 |
categories=categories,
|
| 545 |
model=actual_model,
|
| 546 |
column_name=spreadsheet_column,
|
|
|
|
| 800 |
)
|
| 801 |
sample_results = gr.DataFrame(label="Sample Results (First 5 Rows)", visible=False)
|
| 802 |
results = gr.DataFrame(label="Full Classification Results", visible=False)
|
| 803 |
+
download_file = gr.File(label="Download Results (CSV + Methodology Report)", file_count="multiple")
|
| 804 |
code_output = gr.Code(
|
| 805 |
label="Python Code",
|
| 806 |
language="python",
|