aspendse commited on
Commit
d9fdae3
·
verified ·
1 Parent(s): 0d08870

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -42
app.py CHANGED
@@ -2,63 +2,47 @@ import gradio as gr
2
  from transformers import pipeline
3
  import PyPDF2
4
  import docx
 
5
 
6
- # Load the summarization pipeline
7
- summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
8
 
9
- def read_pdf(file):
10
- try:
11
- reader = PyPDF2.PdfReader(file.name)
12
- text = ""
13
- for page in reader.pages:
14
- text += page.extract_text() or ""
15
- return text
16
- except Exception:
17
- return "Failed to read PDF."
18
 
19
- def read_docx(file):
20
- try:
21
- doc = docx.Document(file.name)
22
- return "\n".join([para.text for para in doc.paragraphs])
23
- except Exception:
24
- return "Failed to read Word document."
25
 
26
- def summarize_file(pdf_file, docx_file, text_input):
27
  if pdf_file:
28
- text = read_pdf(pdf_file)
29
  elif docx_file:
30
- text = read_docx(docx_file)
31
  elif text_input:
32
  text = text_input
33
  else:
34
- return "Please upload a file or enter some text."
35
 
36
  if not text.strip():
37
- return "No valid text found to summarize."
38
 
39
- if len(text) > 3000:
40
- text = text[:3000]
 
41
 
42
- summary = summarizer(text, max_length=150, min_length=30, do_sample=False)[0]["summary_text"]
43
- return summary
44
 
45
  with gr.Blocks() as demo:
46
- gr.Markdown("## 🧠 Smart Text Summarizer")
47
-
48
  with gr.Row():
49
- pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
50
- docx_input = gr.File(label="Upload Word File", file_types=[".docx"])
51
-
52
- text_input = gr.Textbox(lines=8, label="Or Paste Text Here")
53
-
54
- summarize_button = gr.Button("Summarize")
55
-
56
- output = gr.Textbox(lines=8, label="Summary")
57
-
58
- summarize_button.click(
59
- summarize_file,
60
- inputs=[pdf_input, docx_input, text_input],
61
- outputs=output
62
- )
63
 
64
  demo.launch()
 
2
  from transformers import pipeline
3
  import PyPDF2
4
  import docx
5
+ import os
6
 
7
+ # Load a strong summarization model
8
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
9
 
10
+ def extract_text_from_pdf(file):
11
+ reader = PyPDF2.PdfReader(file.name)
12
+ return "\n".join(page.extract_text() or "" for page in reader.pages)
 
 
 
 
 
 
13
 
14
+ def extract_text_from_docx(file):
15
+ doc = docx.Document(file.name)
16
+ return "\n".join(para.text for para in doc.paragraphs)
 
 
 
17
 
18
+ def summarize_input(pdf_file, docx_file, text_input):
19
  if pdf_file:
20
+ text = extract_text_from_pdf(pdf_file)
21
  elif docx_file:
22
+ text = extract_text_from_docx(docx_file)
23
  elif text_input:
24
  text = text_input
25
  else:
26
+ return "Please upload a file or paste text."
27
 
28
  if not text.strip():
29
+ return " Input could not be read or is empty."
30
 
31
+ # Limit to 2000 chars to avoid model truncation
32
+ if len(text) > 2000:
33
+ text = text[:2000]
34
 
35
+ summary = summarizer(text, max_length=150, min_length=40, do_sample=False)
36
+ return summary[0]["summary_text"]
37
 
38
  with gr.Blocks() as demo:
39
+ gr.Markdown("## 🧠 Smart Summarizer")
 
40
  with gr.Row():
41
+ pdf_in = gr.File(label="Upload PDF", file_types=[".pdf"])
42
+ docx_in = gr.File(label="Upload Word (.docx)", file_types=[".docx"])
43
+ txt_in = gr.Textbox(lines=8, label="Or Paste Text")
44
+ btn = gr.Button("Summarize")
45
+ out = gr.Textbox(lines=8, label="Summary")
46
+ btn.click(fn=summarize_input, inputs=[pdf_in, docx_in, txt_in], outputs=out)
 
 
 
 
 
 
 
 
47
 
48
  demo.launch()