File size: 1,295 Bytes
167fa39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# Install the required packages before running this script
# You can uncomment the following line to install packages directly (if running in a local environment)
# !pip install transformers torch PyPDF2 gradio

import gradio as gr
from transformers import pipeline
import PyPDF2

# Load the summarization pipeline
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

def pdf_to_text(pdf_file):
    """Extract text from a PDF file."""
    text = ""
    with open(pdf_file, 'rb') as file:
        reader = PyPDF2.PdfReader(file)
        for page in reader.pages:
            text += page.extract_text() + "\n"
    return text

def summarize_pdf(pdf_file):
    """Summarize the content of a PDF file."""
    text = pdf_to_text(pdf_file)
    if len(text) == 0:
        return "No text found in the PDF."
    
    # Summarize the text
    summary = summarizer(text, max_length=130, min_length=30, do_sample=False)
    return summary[0]['summary_text']

# Create a Gradio interface
interface = gr.Interface(
    fn=summarize_pdf,
    inputs=gr.File(label="Upload a PDF file"),
    outputs=gr.Textbox(label="Summary"),
    title="PDF Summarizer",
    description="Upload a PDF file to receive a summary."
)

# Launch the interface
if __name__ == "__main__":
    interface.launch()