import json from io import BytesIO import plotly.graph_objects as go from reportlab.lib import colors from reportlab.lib.pagesizes import letter from reportlab.lib.styles import getSampleStyleSheet from reportlab.platypus import Image, Paragraph, Preformatted, SimpleDocTemplate, Spacer, Table, TableStyle def _figure_to_image(figure, width=640, height=300): """Convert a Plotly figure to a reportlab Image. Returns None if conversion fails.""" try: image_bytes = figure.to_image(format="png", width=width, height=height, scale=2) return Image(BytesIO(image_bytes), width=width * 0.75, height=height * 0.75) except Exception: return None def _metrics_table(rows): table = Table(rows, hAlign="LEFT") table.setStyle( TableStyle( [ ("BACKGROUND", (0, 0), (-1, 0), colors.HexColor("#f3f4f6")), ("TEXTCOLOR", (0, 0), (-1, 0), colors.HexColor("#111827")), ("ALIGN", (0, 0), (-1, -1), "LEFT"), ("FONTNAME", (0, 0), (-1, 0), "Helvetica-Bold"), ("FONTSIZE", (0, 0), (-1, -1), 9), ("GRID", (0, 0), (-1, -1), 0.5, colors.HexColor("#d1d5db")), ("VALIGN", (0, 0), (-1, -1), "MIDDLE"), ] ) ) return table def _single_model_section(story, model_name, model_payload, styles): prediction = "HATE SPEECH DETECTED" if model_payload["prediction"] == 1 else "NOT HATE SPEECH" prediction_color = "#c62828" if model_payload["prediction"] == 1 else "#2e7d32" story.append(Paragraph(f"{model_name}", styles["Heading3"])) story.append(Paragraph(f"{prediction}", styles["Normal"])) story.append(Spacer(1, 6)) rows = [ ["Metric", "Value"], ["Confidence", f"{model_payload['confidence']:.1%}"], ["Not Hate Speech", f"{model_payload['probabilities'][0]:.1%}"], ["Hate Speech", f"{model_payload['probabilities'][1]:.1%}"], ["Processing Time", f"{model_payload['processing_time']:.3f}s"], ] story.append(_metrics_table(rows)) story.append(Spacer(1, 8)) probability_chart = go.Figure( data=[ go.Bar( x=["Not Hate Speech", "Hate Speech"], y=model_payload["probabilities"], marker_color=["#66bb6a", "#ef5350"], text=[f"{value:.1%}" for value in model_payload["probabilities"]], textposition="auto", ) ] ) probability_chart.update_layout(yaxis_range=[0, 1], height=300, showlegend=False, margin=dict(l=20, r=20, t=20, b=20)) chart_image = _figure_to_image(probability_chart) if chart_image: story.append(Paragraph("Probability Distribution", styles["Italic"])) story.append(chart_image) story.append(Spacer(1, 8)) token_rows = model_payload.get("token_importance") or [] if token_rows: story.append(Paragraph("Top Important Tokens", styles["Italic"])) token_table_rows = [["Token", "Importance"]] + [ [token_data["Token"], f"{token_data['Importance']:.4f}"] for token_data in token_rows ] story.append(_metrics_table(token_table_rows)) story.append(Spacer(1, 8)) technical_details = model_payload.get("technical_details") if technical_details: story.append(Paragraph("Technical Details", styles["Italic"])) story.append(Preformatted(json.dumps(technical_details, indent=2), styles["Code"])) story.append(Spacer(1, 12)) def _batch_model_section(story, model_name, model_payload, styles, colorscale): story.append(Paragraph(f"{model_name}", styles["Heading3"])) story.append(Spacer(1, 4)) rows = [ ["Metric", "Value"], ["F1 Score", f"{model_payload['f1_score']:.4f}"], ["Precision", f"{model_payload['precision']:.4f}"], ["Accuracy", f"{model_payload['accuracy']:.4f}"], ["Recall", f"{model_payload['recall']:.4f}"], ["Avg CPU", f"{model_payload['cpu_usage']:.2f}%"], ["Peak CPU", f"{model_payload['peak_cpu_usage']:.2f}%"], ["Avg Memory", f"{model_payload['memory_usage']:.2f} MB"], ["Peak Memory", f"{model_payload['peak_memory_usage']:.2f} MB"], ["Total Runtime", f"{model_payload['runtime']:.2f}s"], ["Avg Time/Sample", f"{model_payload['avg_time_per_sample']:.3f}s"], ] story.append(_metrics_table(rows)) story.append(Spacer(1, 8)) confusion_matrix = model_payload.get("confusion_matrix") if confusion_matrix is not None: confusion_fig = go.Figure( data=go.Heatmap( z=confusion_matrix, x=["Pred Not Hate", "Pred Hate"], y=["True Not Hate", "True Hate"], colorscale=colorscale, text=confusion_matrix, texttemplate="%{text}", textfont={"size": 14}, showscale=False, ) ) confusion_fig.update_layout(height=300, margin=dict(l=20, r=20, t=20, b=20)) confusion_image = _figure_to_image(confusion_fig) if confusion_image: story.append(Paragraph("Confusion Matrix", styles["Italic"])) story.append(confusion_image) story.append(Spacer(1, 12)) def generate_results_pdf(payload): """Generate a PDF from the current Streamlit results payload.""" output_buffer = BytesIO() document = SimpleDocTemplate( output_buffer, pagesize=letter, leftMargin=36, rightMargin=36, topMargin=36, bottomMargin=36, title="Hate Speech Detection Results", ) styles = getSampleStyleSheet() story = [ Paragraph("Hate Speech Detection Results", styles["Title"]), Spacer(1, 8), ] mode = payload.get("mode", "single") if mode == "single": story.append(Paragraph("Single Text Analysis", styles["Heading2"])) story.append(Spacer(1, 6)) story.append(Paragraph(f"Input Text: {payload.get('input_text', '')}", styles["Normal"])) rationale_text = payload.get("rationale") if rationale_text: story.append(Spacer(1, 4)) story.append(Paragraph(f"Rationale: {rationale_text}", styles["Normal"])) story.append(Spacer(1, 10)) models = payload.get("models", {}) if "base" in models: _single_model_section(story, "Base Bert Ensemble Results", models["base"], styles) if "enhanced" in models: _single_model_section(story, "Enhanced Bert Ensemble Results", models["enhanced"], styles) elif mode == "batch": story.append(Paragraph("Batch File Analysis", styles["Heading2"])) story.append(Spacer(1, 6)) story.append(Paragraph(f"Filename: {payload.get('filename', 'Unknown')}", styles["Normal"])) story.append(Paragraph(f"Rows: {payload.get('rows', 0)}", styles["Normal"])) story.append(Spacer(1, 10)) models = payload.get("models", {}) if "base" in models: _batch_model_section(story, "Base Bert Ensemble Results", models["base"], styles, "Blues") if "enhanced" in models: _batch_model_section(story, "Enhanced Bert Ensemble Results", models["enhanced"], styles, "Greens") else: story.append(Paragraph("No exportable results found.", styles["Normal"])) document.build(story) return output_buffer.getvalue()