aspendse commited on
Commit
d6464e5
·
verified ·
1 Parent(s): 4ae9d44

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -40
app.py CHANGED
@@ -1,45 +1,64 @@
1
  import gradio as gr
2
  from transformers import pipeline
3
- import docx2txt
4
  import PyPDF2
 
5
 
6
- # Load the summarizer model
7
- summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
8
-
9
- # Function to extract text from supported file types
10
- def extract_text(file):
11
- if file is None:
12
- return ""
13
- file_path = file.name
14
- if file_path.endswith(".txt"):
15
- with open(file_path, "r", encoding="utf-8") as f:
16
- return f.read()
17
- elif file_path.endswith(".pdf"):
18
- reader = PyPDF2.PdfReader(file)
19
- return "\n".join(page.extract_text() for page in reader.pages if page.extract_text())
20
- elif file_path.endswith(".docx"):
21
- return docx2txt.process(file)
 
 
 
 
 
 
 
 
 
 
 
22
  else:
23
- return "Unsupported file format. Please upload a .txt, .pdf, or .docx file."
24
-
25
- # Summarize either text input or uploaded file
26
- def summarize(text, file):
27
- extracted_text = text.strip() or extract_text(file)
28
- if not extracted_text:
29
- return "Please enter text or upload a file."
30
- summary = summarizer(extracted_text, max_length=130, min_length=30, do_sample=False)
31
- return summary[0]['summary_text']
32
-
33
- # Gradio interface
34
- iface = gr.Interface(
35
- fn=summarize,
36
- inputs=[
37
- gr.Textbox(lines=10, label="Enter text (optional)"),
38
- gr.File(label="Upload a file (.txt, .pdf, .docx)")
39
- ],
40
- outputs=gr.Textbox(label="Summary"),
41
- title="Text Summarizer",
42
- description="Paste text or upload a file to summarize using a BART model."
43
- )
44
-
45
- iface.launch()
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  from transformers import pipeline
 
3
  import PyPDF2
4
+ import docx
5
 
6
+ # Load the summarization pipeline
7
+ summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
8
+
9
+ def read_pdf(file):
10
+ try:
11
+ reader = PyPDF2.PdfReader(file.name)
12
+ text = ""
13
+ for page in reader.pages:
14
+ text += page.extract_text() or ""
15
+ return text
16
+ except Exception:
17
+ return "Failed to read PDF."
18
+
19
+ def read_docx(file):
20
+ try:
21
+ doc = docx.Document(file.name)
22
+ return "\n".join([para.text for para in doc.paragraphs])
23
+ except Exception:
24
+ return "Failed to read Word document."
25
+
26
+ def summarize_file(pdf_file, docx_file, text_input):
27
+ if pdf_file:
28
+ text = read_pdf(pdf_file)
29
+ elif docx_file:
30
+ text = read_docx(docx_file)
31
+ elif text_input:
32
+ text = text_input
33
  else:
34
+ return "Please upload a file or enter some text."
35
+
36
+ if not text.strip():
37
+ return "No valid text found to summarize."
38
+
39
+ if len(text) > 3000:
40
+ text = text[:3000]
41
+
42
+ summary = summarizer(text, max_length=150, min_length=30, do_sample=False)[0]["summary_text"]
43
+ return summary
44
+
45
+ with gr.Blocks() as demo:
46
+ gr.Markdown("## 🧠 Smart Text Summarizer")
47
+
48
+ with gr.Row():
49
+ pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
50
+ docx_input = gr.File(label="Upload Word File", file_types=[".docx"])
51
+
52
+ text_input = gr.Textbox(lines=8, label="Or Paste Text Here")
53
+
54
+ summarize_button = gr.Button("Summarize")
55
+
56
+ output = gr.Textbox(lines=8, label="Summary")
57
+
58
+ summarize_button.click(
59
+ summarize_file,
60
+ inputs=[pdf_input, docx_input, text_input],
61
+ outputs=output
62
+ )
63
+
64
+ demo.launch()