aspendse commited on
Commit
a368bbc
Β·
verified Β·
1 Parent(s): 01c48da

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -71
app.py CHANGED
@@ -1,80 +1,31 @@
1
  import streamlit as st
2
  from transformers import pipeline
3
- from PyPDF2 import PdfReader
4
- import docx2txt
5
- from fpdf import FPDF
6
- from io import BytesIO
7
- from docx import Document
8
 
9
- # Load summarizer
10
- summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
 
 
 
11
 
12
- # Helper: Extract text from PDF
13
- def extract_text_from_pdf(file):
14
- pdf = PdfReader(file)
15
- text = ""
16
- for page in pdf.pages:
17
- text += page.extract_text()
18
- return text
19
 
20
- # Helper: Save summary to DOCX
21
- def generate_docx(text):
22
- doc = Document()
23
- doc.add_heading("Summary", 0)
24
- doc.add_paragraph(text)
25
- buffer = BytesIO()
26
- doc.save(buffer)
27
- buffer.seek(0)
28
- return buffer
29
 
30
- # Helper: Save summary to PDF
31
- def generate_pdf(text):
32
- pdf = FPDF()
33
- pdf.add_page()
34
- pdf.set_font("Arial", size=12)
35
- for line in text.split("\n"):
36
- pdf.multi_cell(0, 10, line)
37
- buffer = BytesIO()
38
- pdf.output(buffer)
39
- buffer.seek(0)
40
- return buffer
41
 
42
- # App UI
43
- st.set_page_config(page_title="Smart Text Summarizer", layout="centered")
44
- st.title("πŸ“„ Smart Text Summarizer")
45
-
46
- uploaded_file = st.file_uploader("Upload a PDF or Word file", type=["pdf", "docx"])
47
-
48
- if uploaded_file:
49
- file_type = uploaded_file.name.split(".")[-1].lower()
50
-
51
- # Extract text
52
- if file_type == "pdf":
53
- text = extract_text_from_pdf(uploaded_file)
54
- elif file_type == "docx":
55
- text = docx2txt.process(uploaded_file)
56
  else:
57
- st.error("Unsupported file format.")
58
- st.stop()
59
-
60
- if not text:
61
- st.error("No readable text found in the file.")
62
- st.stop()
63
-
64
- st.subheader("πŸ” Extracted Text")
65
- st.text_area("Preview", text[:1000] + "...", height=200)
66
-
67
- # Summarize
68
- if st.button("Summarize"):
69
  with st.spinner("Summarizing..."):
70
- summary = summarizer(text, max_length=150, min_length=30, do_sample=False)[0]['summary_text']
71
- st.success("Summary Generated βœ…")
72
- st.subheader("πŸ“ Summary")
73
- st.write(summary)
74
-
75
- # Download options
76
- docx_file = generate_docx(summary)
77
- pdf_file = generate_pdf(summary)
78
-
79
- st.download_button("πŸ“₯ Download as DOCX", docx_file, file_name="summary.docx")
80
- st.download_button("πŸ“₯ Download as PDF", pdf_file, file_name="summary.pdf")
 
1
  import streamlit as st
2
  from transformers import pipeline
 
 
 
 
 
3
 
4
+ # Load model (cached)
5
+ @st.cache_resource
6
+ def load_summarizer():
7
+ summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
8
+ return summarizer
9
 
10
+ summarizer = load_summarizer()
 
 
 
 
 
 
11
 
12
+ # Streamlit UI
13
+ st.set_page_config(page_title="Text Summarizer", layout="centered")
14
+ st.title("πŸ“ Text Summarizer")
15
+ st.markdown("Enter a long piece of text, and this app will summarize it using a Hugging Face transformer model.")
 
 
 
 
 
16
 
17
+ # Input box
18
+ text_input = st.text_area("Enter your text here", height=300)
 
 
 
 
 
 
 
 
 
19
 
20
+ # Button to summarize
21
+ if st.button("Summarize"):
22
+ if not text_input.strip():
23
+ st.warning("⚠️ Please enter some text first.")
 
 
 
 
 
 
 
 
 
 
24
  else:
 
 
 
 
 
 
 
 
 
 
 
 
25
  with st.spinner("Summarizing..."):
26
+ try:
27
+ summary = summarizer(text_input, max_length=130, min_length=30, do_sample=False)
28
+ st.subheader("Summary:")
29
+ st.success(summary[0]['summary_text'])
30
+ except Exception as e:
31
+ st.error(f"❌ Error during summarization: {e}")