Srikesh commited on
Commit
3669fa7
·
verified ·
1 Parent(s): e82253e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -12
app.py CHANGED
@@ -5,11 +5,11 @@ from PyPDF2 import PdfReader
5
  # Load summarization model
6
  summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
7
 
8
- # Function to read and summarize PDF
9
- def summarize_pdf(pdf_file):
10
  if pdf_file is None:
11
  return "Please upload a PDF file."
12
 
 
13
  reader = PdfReader(pdf_file.name)
14
  text = ""
15
  for page in reader.pages:
@@ -17,27 +17,49 @@ def summarize_pdf(pdf_file):
17
  if page_text:
18
  text += page_text + "\n"
19
 
20
- # Chunk the text
 
 
 
 
 
 
 
 
 
 
 
21
  max_chunk = 1000
22
- chunks = [text[i:i+max_chunk] for i in range(0, len(text), max_chunk)]
23
 
24
- # Summarize each chunk
25
  summaries = []
26
  for chunk in chunks:
27
- summary = summarizer(chunk, max_length=130, min_length=30, do_sample=False)
28
- summaries.append(summary[0]['summary_text'])
 
 
 
 
 
29
 
30
  final_summary = " ".join(summaries)
31
  return final_summary
32
 
33
 
34
- # Gradio UI
35
  iface = gr.Interface(
36
  fn=summarize_pdf,
37
- inputs=gr.File(label="Upload a PDF"),
38
- outputs=gr.Textbox(label="Summary"),
39
- title="PDF Summarizer",
40
- description="Upload a PDF to get a summarized version of its content using Hugging Face transformers."
 
 
 
 
 
 
 
41
  )
42
 
43
  iface.launch()
 
5
  # Load summarization model
6
  summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
7
 
8
+ def summarize_pdf(pdf_file, summary_length):
 
9
  if pdf_file is None:
10
  return "Please upload a PDF file."
11
 
12
+ # Read PDF text
13
  reader = PdfReader(pdf_file.name)
14
  text = ""
15
  for page in reader.pages:
 
17
  if page_text:
18
  text += page_text + "\n"
19
 
20
+ if not text.strip():
21
+ return "No readable text found in this PDF."
22
+
23
+ # Define summary length settings
24
+ if summary_length == "Short":
25
+ max_len, min_len = 60, 20
26
+ elif summary_length == "Medium":
27
+ max_len, min_len = 130, 40
28
+ else: # Long
29
+ max_len, min_len = 200, 60
30
+
31
+ # Split text into manageable chunks
32
  max_chunk = 1000
33
+ chunks = [text[i:i + max_chunk] for i in range(0, len(text), max_chunk)]
34
 
 
35
  summaries = []
36
  for chunk in chunks:
37
+ summary = summarizer(
38
+ chunk,
39
+ max_length=max_len,
40
+ min_length=min_len,
41
+ do_sample=False
42
+ )[0]["summary_text"]
43
+ summaries.append(summary)
44
 
45
  final_summary = " ".join(summaries)
46
  return final_summary
47
 
48
 
49
+ # Gradio Interface
50
  iface = gr.Interface(
51
  fn=summarize_pdf,
52
+ inputs=[
53
+ gr.File(label="Upload your PDF"),
54
+ gr.Radio(
55
+ ["Short", "Medium", "Long"],
56
+ label="Select Summary Length",
57
+ value="Medium"
58
+ )
59
+ ],
60
+ outputs=gr.Textbox(label="Generated Summary", lines=10),
61
+ title="📘 PDF Summarizer",
62
+ description="Upload a PDF and choose summary length (Short / Medium / Long). Powered by Hugging Face transformers."
63
  )
64
 
65
  iface.launch()