from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle from reportlab.pdfbase.ttfonts import TTFont from reportlab.pdfbase import pdfmetrics from reportlab.lib.pagesizes import A4 from reportlab.lib.units import inch from src.pipelines.pipeline import Pipeline from datetime import datetime import streamlit as st from io import BytesIO import tempfile import requests import time # Configure the page st.set_page_config( page_title="AlphaExtract — Your AI-powered PDF Summarizer", page_icon="📈", layout="wide" ) # Custom styling st.markdown(""" """, unsafe_allow_html=True) # Initialize the pipeline pipeline = Pipeline() # Cache font download @st.cache_resource(show_spinner=False) def getDejaVuFontPath(): fontUrl = "https://github.com/senotrusov/dejavu-fonts-ttf/raw/refs/heads/master/ttf/DejaVuSans.ttf" response = requests.get(fontUrl) tempFontFile = tempfile.NamedTemporaryFile(delete=False, suffix=".ttf") tempFontFile.write(response.content) tempFontFile.close() return tempFontFile.name # Cache summary generation @st.cache_data(show_spinner=False, ttl=3600) def generateSummary(_pipeline, pdfBytesList): return pipeline.run(pdfBytesList) # Cache PDF generation @st.cache_data(show_spinner=False, ttl=3600) def generatePdfBytes(summary, fontPath): buffer = BytesIO() doc = SimpleDocTemplate(buffer, pagesize=A4, rightMargin=72, leftMargin=72, topMargin=72, bottomMargin=72) pdfmetrics.registerFont(TTFont("DejaVu", fontPath)) styles = getSampleStyleSheet() styles.add(ParagraphStyle( name="summaryStyle", fontName="DejaVu", fontSize=12, leading=18, spaceAfter=12 )) story = [ Paragraph("PDF Summary", styles["Heading1"]), Spacer(1, 0.2 * inch), Paragraph(summary.replace("\n", "
"), styles["summaryStyle"]) ] doc.build(story) pdfValue = buffer.getvalue() buffer.close() return pdfValue # Sidebar with st.sidebar: st.markdown("## 📄 Upload PDFs") uploadedFiles = st.file_uploader("Drop your PDFs here", type=["pdf"], accept_multiple_files=True) if uploadedFiles: st.markdown("### 🔍 Files Info") total_size = 0 for file in uploadedFiles: file_size = len(file.getvalue()) / 1024 total_size += file_size st.write(f"**📁 {file.name}**: {file_size:.2f} KB") st.write(f"**📦 Total Size**: {total_size:.2f} KB") st.write(f"**⏰ Uploaded**: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") st.markdown("---") st.markdown("""

Inference by Groq
using Meta's LLaMA 4 MOE Maverick
for blazing-fast, high-precision summaries.

""", unsafe_allow_html=True) # Main content st.markdown("

Welcome to AlphaExtract

", unsafe_allow_html=True) st.write("Upload your PDFs to instantly receive a professional-grade analytical summary.") if uploadedFiles: statusContainer = st.empty() summaryContainer = st.empty() with statusContainer.container(): st.markdown("### ⏳ Processing Status") statusBox = st.empty() try: startTime = time.time() statusBox.info("📘 Reading PDF files...") pdfBytesList = [file.getvalue() for file in uploadedFiles] readDuration = time.time() - startTime statusBox.success(f"✅ {len(uploadedFiles)} PDF files read successfully ({readDuration:.2f}s)") statusBox.info("🧠 Generating combined summary...") summary = generateSummary(pipeline, pdfBytesList) totalTime = time.time() - startTime if summary: statusBox.success(f"✅ Combined summary generated successfully (Total time: {totalTime:.2f}s)") with summaryContainer.container(): st.markdown("

📊 Generated Summary

", unsafe_allow_html=True) st.markdown(summary) try: fontPath = getDejaVuFontPath() pdfBytesOut = generatePdfBytes(summary, fontPath) timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") st.download_button( label="⬇️ Download Summary as PDF", data=pdfBytesOut, file_name=f"combined_summary_{timestamp}.pdf", mime="application/pdf" ) except Exception as e: st.error(f"❌ Error creating PDF: {str(e)}") else: statusBox.error("❌ Failed to generate summary. Please try again.") except Exception as e: statusBox.error(f"❌ Error processing PDFs: {str(e)}") else: st.info("🚀 Please upload your PDF files using the sidebar to get started.")