|
|
import gradio as gr |
|
|
from transformers import pipeline |
|
|
from PyPDF2 import PdfReader |
|
|
|
|
|
|
|
|
summarizer = pipeline("summarization", model="facebook/bart-large-cnn") |
|
|
|
|
|
def summarize_pdf(pdf_file, summary_length): |
|
|
if pdf_file is None: |
|
|
return "Please upload a PDF file." |
|
|
|
|
|
|
|
|
reader = PdfReader(pdf_file.name) |
|
|
text = "" |
|
|
for page in reader.pages: |
|
|
page_text = page.extract_text() |
|
|
if page_text: |
|
|
text += page_text + "\n" |
|
|
|
|
|
if not text.strip(): |
|
|
return "No readable text found in this PDF." |
|
|
|
|
|
|
|
|
if summary_length == "Short": |
|
|
max_len, min_len = 60, 20 |
|
|
elif summary_length == "Medium": |
|
|
max_len, min_len = 130, 40 |
|
|
else: |
|
|
max_len, min_len = 200, 60 |
|
|
|
|
|
|
|
|
max_chunk = 1000 |
|
|
chunks = [text[i:i + max_chunk] for i in range(0, len(text), max_chunk)] |
|
|
|
|
|
summaries = [] |
|
|
for chunk in chunks: |
|
|
summary = summarizer( |
|
|
chunk, |
|
|
max_length=max_len, |
|
|
min_length=min_len, |
|
|
do_sample=False |
|
|
)[0]["summary_text"] |
|
|
summaries.append(summary) |
|
|
|
|
|
final_summary = " ".join(summaries) |
|
|
return final_summary |
|
|
|
|
|
|
|
|
|
|
|
iface = gr.Interface( |
|
|
fn=summarize_pdf, |
|
|
inputs=[ |
|
|
gr.File(label="Upload your PDF"), |
|
|
gr.Radio( |
|
|
["Short", "Medium", "Long"], |
|
|
label="Select Summary Length", |
|
|
value="Medium" |
|
|
) |
|
|
], |
|
|
outputs=gr.Textbox(label="Generated Summary", lines=10), |
|
|
title="๐ PDF Summarizer", |
|
|
description="Upload a PDF and choose summary length (Short / Medium / Long). Powered by Hugging Face transformers." |
|
|
) |
|
|
|
|
|
iface.launch() |
|
|
|