aspendse commited on
Commit
274f18d
Β·
verified Β·
1 Parent(s): b76110c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -55
app.py CHANGED
@@ -1,68 +1,80 @@
1
- import gradio as gr
2
  from transformers import pipeline
3
  from PyPDF2 import PdfReader
4
- from docx import Document
5
  from fpdf import FPDF
6
- import os
 
7
 
8
- # Load summarization pipeline
9
- summarizer = pipeline("summarization")
10
 
11
- # Function to read text from different file types
12
- def extract_text(file):
 
13
  text = ""
14
- if file.name.endswith(".pdf"):
15
- reader = PdfReader(file.name)
16
- for page in reader.pages:
17
- text += page.extract_text()
18
- elif file.name.endswith(".docx"):
19
- doc = Document(file.name)
20
- for para in doc.paragraphs:
21
- text += para.text + "\n"
22
- else:
23
- text = file.read().decode("utf-8")
24
  return text
25
 
26
- # Function to summarize and return as PDF or Word
27
- def summarize_file(file, output_format):
28
- text = extract_text(file)
29
- if not text.strip():
30
- return None, "File is empty or could not be read."
31
-
32
- summarized = summarizer(text, max_length=150, min_length=40, do_sample=False)[0]["summary_text"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
- output_path = "/tmp/summary_output"
35
- if output_format == "PDF":
36
- pdf = FPDF()
37
- pdf.add_page()
38
- pdf.set_auto_page_break(auto=True, margin=15)
39
- pdf.set_font("Arial", size=12)
40
- for line in summarized.split("\n"):
41
- pdf.multi_cell(0, 10, line)
42
- output_file = f"{output_path}.pdf"
43
- pdf.output(output_file)
44
  else:
45
- doc = Document()
46
- doc.add_heading("Summary", 0)
47
- doc.add_paragraph(summarized)
48
- output_file = f"{output_path}.docx"
49
- doc.save(output_file)
 
 
 
 
50
 
51
- return output_file, "Success"
 
 
 
 
 
 
52
 
53
- # Gradio interface
54
- iface = gr.Interface(
55
- fn=summarize_file,
56
- inputs=[
57
- gr.File(label="Upload a text, Word, or PDF file"),
58
- gr.Radio(["PDF", "Word"], label="Choose output format")
59
- ],
60
- outputs=[
61
- gr.File(label="Download Summary"),
62
- gr.Textbox(label="Status")
63
- ],
64
- title="Smart Text Summarizer",
65
- description="Upload a .txt, .docx, or .pdf file and get the summary back as a Word or PDF file."
66
- )
67
 
68
- iface.launch()
 
 
1
+ import streamlit as st
2
  from transformers import pipeline
3
  from PyPDF2 import PdfReader
4
+ import docx2txt
5
  from fpdf import FPDF
6
+ from io import BytesIO
7
+ from docx import Document
8
 
9
+ # Load summarizer
10
+ summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
11
 
12
+ # Helper: Extract text from PDF
13
+ def extract_text_from_pdf(file):
14
+ pdf = PdfReader(file)
15
  text = ""
16
+ for page in pdf.pages:
17
+ text += page.extract_text()
 
 
 
 
 
 
 
 
18
  return text
19
 
20
+ # Helper: Save summary to DOCX
21
+ def generate_docx(text):
22
+ doc = Document()
23
+ doc.add_heading("Summary", 0)
24
+ doc.add_paragraph(text)
25
+ buffer = BytesIO()
26
+ doc.save(buffer)
27
+ buffer.seek(0)
28
+ return buffer
29
+
30
+ # Helper: Save summary to PDF
31
+ def generate_pdf(text):
32
+ pdf = FPDF()
33
+ pdf.add_page()
34
+ pdf.set_font("Arial", size=12)
35
+ for line in text.split("\n"):
36
+ pdf.multi_cell(0, 10, line)
37
+ buffer = BytesIO()
38
+ pdf.output(buffer)
39
+ buffer.seek(0)
40
+ return buffer
41
+
42
+ # App UI
43
+ st.set_page_config(page_title="Smart Text Summarizer", layout="centered")
44
+ st.title("πŸ“„ Smart Text Summarizer")
45
+
46
+ uploaded_file = st.file_uploader("Upload a PDF or Word file", type=["pdf", "docx"])
47
+
48
+ if uploaded_file:
49
+ file_type = uploaded_file.name.split(".")[-1].lower()
50
 
51
+ # Extract text
52
+ if file_type == "pdf":
53
+ text = extract_text_from_pdf(uploaded_file)
54
+ elif file_type == "docx":
55
+ text = docx2txt.process(uploaded_file)
 
 
 
 
 
56
  else:
57
+ st.error("Unsupported file format.")
58
+ st.stop()
59
+
60
+ if not text:
61
+ st.error("No readable text found in the file.")
62
+ st.stop()
63
+
64
+ st.subheader("πŸ” Extracted Text")
65
+ st.text_area("Preview", text[:1000] + "...", height=200)
66
 
67
+ # Summarize
68
+ if st.button("Summarize"):
69
+ with st.spinner("Summarizing..."):
70
+ summary = summarizer(text, max_length=150, min_length=30, do_sample=False)[0]['summary_text']
71
+ st.success("Summary Generated βœ…")
72
+ st.subheader("πŸ“ Summary")
73
+ st.write(summary)
74
 
75
+ # Download options
76
+ docx_file = generate_docx(summary)
77
+ pdf_file = generate_pdf(summary)
 
 
 
 
 
 
 
 
 
 
 
78
 
79
+ st.download_button("πŸ“₯ Download as DOCX", docx_file, file_name="summary.docx")
80
+ st.download_button("πŸ“₯ Download as PDF", pdf_file, file_name="summary.pdf")