File size: 6,197 Bytes
14542aa
 
 
 
 
 
 
9aa78e6
 
 
 
585cfff
9bad00c
 
9aa78e6
 
 
a4cb6f8
 
9aa78e6
 
 
 
 
 
a4cb6f8
 
 
 
 
9aa78e6
a4cb6f8
 
 
 
 
 
9aa78e6
a4cb6f8
9aa78e6
 
a4cb6f8
 
 
 
 
9aa78e6
a4cb6f8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9aa78e6
 
 
 
a4cb6f8
 
 
 
585cfff
14542aa
 
 
585cfff
 
 
 
 
a4cb6f8
878010f
50cbe29
 
9bad00c
a4cb6f8
878010f
14542aa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9bad00c
9aa78e6
 
50cbe29
 
 
 
 
 
 
 
 
 
 
 
 
a4cb6f8
 
cbb7483
 
 
 
 
 
 
a4cb6f8
9aa78e6
a4cb6f8
50cbe29
9aa78e6
50cbe29
9aa78e6
 
 
 
a4cb6f8
9aa78e6
 
 
 
50cbe29
 
9aa78e6
50cbe29
9aa78e6
50cbe29
 
9aa78e6
 
 
50cbe29
9aa78e6
 
a4cb6f8
9aa78e6
a4cb6f8
14542aa
 
 
 
 
 
a4cb6f8
14542aa
50cbe29
14542aa
 
 
a4cb6f8
9aa78e6
a4cb6f8
9aa78e6
50cbe29
9aa78e6
50cbe29
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.pdfbase.ttfonts import TTFont
from reportlab.pdfbase import pdfmetrics
from reportlab.lib.pagesizes import A4
from reportlab.lib.units import inch

from src.pipelines.pipeline import Pipeline
from datetime import datetime
import streamlit as st
from io import BytesIO
import tempfile
import requests
import time

# Configure the page
st.set_page_config(
    page_title="AlphaExtract β€” Your AI-powered PDF Summarizer",
    page_icon="πŸ“ˆ",
    layout="wide"
)

# Custom styling
st.markdown("""
    <style>
    html, body, [class*="css"]  {
        font-family: 'Segoe UI', sans-serif;
        background-color: #f9fbfd;
    }

    .main-header {
        font-size: 3rem;
        color: #0A66C2;
        font-weight: 700;
        text-align: center;
        margin-top: 1rem;
        margin-bottom: 2.5rem;
    }

    .summary-header {
        font-size: 1.8rem;
        color: #00695C;
        font-weight: 600;
        border-bottom: 2px solid #e0e0e0;
        padding-bottom: 0.5rem;
        margin-top: 2rem;
    }

    .stDownloadButton > button {
        background-color: #0A66C2;
        color: white;
        font-weight: bold;
        border-radius: 8px;
        padding: 0.6rem 1.2rem;
        margin-top: 1rem;
        transition: background-color 0.3s ease;
    }

    .stDownloadButton > button:hover {
        background-color: #084B8A;
    }

    .sidebar .sidebar-content {
        background-color: #ffffff;
    }
    </style>
""", unsafe_allow_html=True)

# Initialize the pipeline
pipeline = Pipeline()

# Cache font download
@st.cache_resource(show_spinner=False)
def getDejaVuFontPath():
    fontUrl = "https://github.com/senotrusov/dejavu-fonts-ttf/raw/refs/heads/master/ttf/DejaVuSans.ttf"
    response = requests.get(fontUrl)
    tempFontFile = tempfile.NamedTemporaryFile(delete=False, suffix=".ttf")
    tempFontFile.write(response.content)
    tempFontFile.close()
    return tempFontFile.name

# Cache summary generation
@st.cache_data(show_spinner=False, ttl=3600)
def generateSummary(_pipeline, pdfBytesList):
    return pipeline.run(pdfBytesList)

# Cache PDF generation
@st.cache_data(show_spinner=False, ttl=3600)
def generatePdfBytes(summary, fontPath):
    buffer = BytesIO()
    doc = SimpleDocTemplate(buffer, pagesize=A4,
                            rightMargin=72, leftMargin=72,
                            topMargin=72, bottomMargin=72)

    pdfmetrics.registerFont(TTFont("DejaVu", fontPath))

    styles = getSampleStyleSheet()
    styles.add(ParagraphStyle(
        name="summaryStyle",
        fontName="DejaVu",
        fontSize=12,
        leading=18,
        spaceAfter=12
    ))

    story = [
        Paragraph("PDF Summary", styles["Heading1"]),
        Spacer(1, 0.2 * inch),
        Paragraph(summary.replace("\n", "<br/>"), styles["summaryStyle"])
    ]

    doc.build(story)
    pdfValue = buffer.getvalue()
    buffer.close()
    return pdfValue

# Sidebar
with st.sidebar:
    st.markdown("## πŸ“„ Upload PDFs")
    uploadedFiles = st.file_uploader("Drop your PDFs here", type=["pdf"], accept_multiple_files=True)

    if uploadedFiles:
        st.markdown("### πŸ” Files Info")
        total_size = 0
        for file in uploadedFiles:
            file_size = len(file.getvalue()) / 1024
            total_size += file_size
            st.write(f"**πŸ“ {file.name}**: {file_size:.2f} KB")
        
        st.write(f"**πŸ“¦ Total Size**: {total_size:.2f} KB")
        st.write(f"**⏰ Uploaded**: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

    st.markdown("---")
    st.markdown("""
    <div style='font-size: 0.9rem; line-height: 1.4;'>
    Inference by <strong>Groq</strong><br>
    using Meta's <strong>LLaMA 4 MOE Maverick</strong><br>
    for blazing-fast, high-precision summaries.
    </div>
    """, unsafe_allow_html=True)

# Main content
st.markdown("<h1 class='main-header'>Welcome to <span style='color:#2E7D32'>AlphaExtract</span></h1>", unsafe_allow_html=True)
st.write("Upload your PDFs to instantly receive a professional-grade analytical summary.")

if uploadedFiles:
    statusContainer = st.empty()
    summaryContainer = st.empty()

    with statusContainer.container():
        st.markdown("### ⏳ Processing Status")
        statusBox = st.empty()

        try:
            startTime = time.time()
            statusBox.info("πŸ“˜ Reading PDF files...")
            pdfBytesList = [file.getvalue() for file in uploadedFiles]
            readDuration = time.time() - startTime
            statusBox.success(f"βœ… {len(uploadedFiles)} PDF files read successfully ({readDuration:.2f}s)")

            statusBox.info("🧠 Generating combined summary...")
            summary = generateSummary(pipeline, pdfBytesList)
            totalTime = time.time() - startTime

            if summary:
                statusBox.success(f"βœ… Combined summary generated successfully (Total time: {totalTime:.2f}s)")

                with summaryContainer.container():
                    st.markdown("<h2 class='summary-header'>πŸ“Š Generated Summary</h2>", unsafe_allow_html=True)
                    st.markdown(summary)

                    try:
                        fontPath = getDejaVuFontPath()
                        pdfBytesOut = generatePdfBytes(summary, fontPath)
                        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

                        st.download_button(
                            label="⬇️ Download Summary as PDF",
                            data=pdfBytesOut,
                            file_name=f"combined_summary_{timestamp}.pdf",
                            mime="application/pdf"
                        )
                    except Exception as e:
                        st.error(f"❌ Error creating PDF: {str(e)}")
            else:
                statusBox.error("❌ Failed to generate summary. Please try again.")
        except Exception as e:
            statusBox.error(f"❌ Error processing PDFs: {str(e)}")
else:
    st.info("πŸš€ Please upload your PDF files using the sidebar to get started.")