pdfreaderandsummarizer / pdf_summarizer.py
akazmi's picture
Create pdf_summarizer.py
167fa39 verified
# Install the required packages before running this script
# You can uncomment the following line to install packages directly (if running in a local environment)
# !pip install transformers torch PyPDF2 gradio
import gradio as gr
from transformers import pipeline
import PyPDF2
# Load the summarization pipeline
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
def pdf_to_text(pdf_file):
"""Extract text from a PDF file."""
text = ""
with open(pdf_file, 'rb') as file:
reader = PyPDF2.PdfReader(file)
for page in reader.pages:
text += page.extract_text() + "\n"
return text
def summarize_pdf(pdf_file):
"""Summarize the content of a PDF file."""
text = pdf_to_text(pdf_file)
if len(text) == 0:
return "No text found in the PDF."
# Summarize the text
summary = summarizer(text, max_length=130, min_length=30, do_sample=False)
return summary[0]['summary_text']
# Create a Gradio interface
interface = gr.Interface(
fn=summarize_pdf,
inputs=gr.File(label="Upload a PDF file"),
outputs=gr.Textbox(label="Summary"),
title="PDF Summarizer",
description="Upload a PDF file to receive a summary."
)
# Launch the interface
if __name__ == "__main__":
interface.launch()