akazmi commited on
Commit
167fa39
·
verified ·
1 Parent(s): ce71446

Create pdf_summarizer.py

Browse files
Files changed (1) hide show
  1. pdf_summarizer.py +42 -0
pdf_summarizer.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Install the required packages before running this script
2
+ # You can uncomment the following line to install packages directly (if running in a local environment)
3
+ # !pip install transformers torch PyPDF2 gradio
4
+
5
+ import gradio as gr
6
+ from transformers import pipeline
7
+ import PyPDF2
8
+
9
+ # Load the summarization pipeline
10
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
11
+
12
+ def pdf_to_text(pdf_file):
13
+ """Extract text from a PDF file."""
14
+ text = ""
15
+ with open(pdf_file, 'rb') as file:
16
+ reader = PyPDF2.PdfReader(file)
17
+ for page in reader.pages:
18
+ text += page.extract_text() + "\n"
19
+ return text
20
+
21
+ def summarize_pdf(pdf_file):
22
+ """Summarize the content of a PDF file."""
23
+ text = pdf_to_text(pdf_file)
24
+ if len(text) == 0:
25
+ return "No text found in the PDF."
26
+
27
+ # Summarize the text
28
+ summary = summarizer(text, max_length=130, min_length=30, do_sample=False)
29
+ return summary[0]['summary_text']
30
+
31
+ # Create a Gradio interface
32
+ interface = gr.Interface(
33
+ fn=summarize_pdf,
34
+ inputs=gr.File(label="Upload a PDF file"),
35
+ outputs=gr.Textbox(label="Summary"),
36
+ title="PDF Summarizer",
37
+ description="Upload a PDF file to receive a summary."
38
+ )
39
+
40
+ # Launch the interface
41
+ if __name__ == "__main__":
42
+ interface.launch()