Spaces:

ojasrohatgi
/

Abstractive-Article_Summarizer

Sleeping

App Files Files Community

ojasrohatgi commited on Nov 18, 2024

Commit

06ae4c0

verified ·

1 Parent(s): ebc9977

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -53

app.py CHANGED Viewed

@@ -9,6 +9,7 @@ from reportlab.lib.pagesizes import A4
 from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
 from reportlab.platypus import SimpleDocTemplate, Paragraph
 from reportlab.lib.enums import TA_JUSTIFY
 # Initialize the summarization pipeline
 summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
@@ -19,68 +20,54 @@ st.set_page_config(layout="wide")
 # Function to create PDF with justified text
 def create_pdf(text):
-    # Create a BytesIO buffer to avoid saving the PDF to disk
     pdf_buffer = BytesIO()
-    # Define the PDF document layout and page size
     doc = SimpleDocTemplate(pdf_buffer, pagesize=A4)
-    # Define a style for justified text
     styles = getSampleStyleSheet()
     justified_style = ParagraphStyle(
         name="JustifiedStyle",
         parent=styles["BodyText"],
         alignment=TA_JUSTIFY,
         fontSize=12,
-        leading=15  # Adjust line spacing as needed
     )
-    # Create a Paragraph object with justified text
     paragraph = Paragraph(text, justified_style)
-    # Build the PDF in the buffer
-    elements = [paragraph]
-    doc.build(elements)
-    # Move the buffer to the beginning so Streamlit can read it
     pdf_buffer.seek(0)
     return pdf_buffer
 # Main application
 def main():
-    st.title("Article Extractor and Summarizer")
-    # Get URL from the user
-    url = st.text_input("Share an article URL:", key="url")
-    # Define max chunk size to split article into manageable parts
     max_chunk = 300
     if url:
         try:
-            # Fetch and parse the article
             response = requests.get(url)
             response.encoding = 'utf-8'
             soup = BeautifulSoup(response.text, 'html.parser')
             results = soup.find_all(['h1', 'p'])
-            # Clean and concatenate text
             text = [html.unescape(result.get_text()) for result in results]
             article = ' '.join(text)
-            # Display the extracted article text in a scrollable window
             st.subheader("Extracted Article Content")
             st.text_area("Article", article, height=300)
             st.markdown(f"**Article Length:** {len(article)} characters")
-            # Preprocess text for chunking
             article = article.replace('.', '.<eos>').replace('?', '?<eos>').replace('!', '!<eos>')
             sentences = article.split('<eos>')
             current_chunk = 0
             chunks = [[]]
-            # Split text into manageable chunks
             for sentence in sentences:
                 if len(chunks[current_chunk]) + len(sentence.split(' ')) <= max_chunk:
                     chunks[current_chunk].extend(sentence.split(' '))
@@ -88,69 +75,62 @@ def main():
                     current_chunk += 1
                     chunks.append(sentence.split(' '))
-            # Join words back to form full sentences for each chunk
             for chunk_id in range(len(chunks)):
                 chunks[chunk_id] = ' '.join(chunks[chunk_id])
-            # Streamlit progress bar, dynamic status display, and summaries list
             progress_bar = st.progress(0)
-            status_text = st.empty()  # Placeholder for dynamic status updates
             summaries = []
             start_time = time.time()
-            # Summarize each chunk and update progress
             for i, chunk in enumerate(chunks):
                 summary = summarizer(chunk, max_length=120, min_length=30, do_sample=False)
                 summaries.append(summary[0]['summary_text'])
-                # Calculate and display percentage completed and estimated time
                 percent_complete = (i + 1) / len(chunks)
                 elapsed_time = time.time() - start_time
                 estimated_total_time = elapsed_time / percent_complete
                 estimated_time_remaining = estimated_total_time - elapsed_time
-                # Update progress bar and status text
                 progress_bar.progress(percent_complete)
                 status_text.markdown(f"**Progress:** {percent_complete * 100:.2f}% - "
                                      f"**Estimated time remaining:** {estimated_time_remaining:.2f} seconds")
-            # Combine summaries into a single text output
             summary_text = ' '.join(summaries)
-            # Display the summarized text
             st.subheader("Summarized Article Content")
             st.text_area("Summary", summary_text, height=300)
             st.markdown(f"**Summary Length:** {len(summary_text)} characters")
-            # Create the PDF from the summary text with justified alignment and wrapping
             pdf_buffer = create_pdf(summary_text)
-            # Display the download button for the PDF
-            st.download_button(
-                label="Download Summary as PDF",
-                data=pdf_buffer,
-                file_name="summarized_article.pdf",
-                mime="application/pdf"
-            )
-            # Display the compression ratio
             original_length = len(article.split())
             summary_length = len(summary_text.split())
             compression_ratio = (summary_length / original_length) * 100
-            # Evaluate if the compression ratio is good or bad
             if compression_ratio < 20:
-                st.success(
-                    f"{round(compression_ratio)}% Great Compression!\nThe summary is succinct and effectively "
-                    f"highlights key points.")
             elif 20 <= compression_ratio <= 40:
-                st.info(
-                    f"{round(compression_ratio)}% Well-balanced Summary.\nIt maintains essential details while being "
-                    f"brief.")
             else:
-                st.warning(
-                    f"{round(compression_ratio)}% Compression may be excessive.\nThe summary could be too brief and "
-                    f"miss important details.")
         except Exception as e:
             st.warning(f"Error: {e}")

 from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
 from reportlab.platypus import SimpleDocTemplate, Paragraph
 from reportlab.lib.enums import TA_JUSTIFY
+import pyttsx3
 # Initialize the summarization pipeline
 summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
 # Function to create PDF with justified text
 def create_pdf(text):
     pdf_buffer = BytesIO()
     doc = SimpleDocTemplate(pdf_buffer, pagesize=A4)
     styles = getSampleStyleSheet()
     justified_style = ParagraphStyle(
         name="JustifiedStyle",
         parent=styles["BodyText"],
         alignment=TA_JUSTIFY,
         fontSize=12,
+        leading=15
     )
     paragraph = Paragraph(text, justified_style)
+    doc.build([paragraph])
     pdf_buffer.seek(0)
     return pdf_buffer
+# Function to read aloud the summary
+def read_aloud(text):
+    engine = pyttsx3.init()
+    engine.say(text)
+    engine.runAndWait()
 # Main application
 def main():
+    st.title("Enhanced Article Extractor and Summarizer")
+    url = st.text_input("Enter the URL of an article:", key="url")
     max_chunk = 300
     if url:
         try:
             response = requests.get(url)
             response.encoding = 'utf-8'
             soup = BeautifulSoup(response.text, 'html.parser')
             results = soup.find_all(['h1', 'p'])
             text = [html.unescape(result.get_text()) for result in results]
             article = ' '.join(text)
             st.subheader("Extracted Article Content")
             st.text_area("Article", article, height=300)
             st.markdown(f"**Article Length:** {len(article)} characters")
             article = article.replace('.', '.<eos>').replace('?', '?<eos>').replace('!', '!<eos>')
             sentences = article.split('<eos>')
             current_chunk = 0
             chunks = [[]]
             for sentence in sentences:
                 if len(chunks[current_chunk]) + len(sentence.split(' ')) <= max_chunk:
                     chunks[current_chunk].extend(sentence.split(' '))
                     current_chunk += 1
                     chunks.append(sentence.split(' '))
             for chunk_id in range(len(chunks)):
                 chunks[chunk_id] = ' '.join(chunks[chunk_id])
             progress_bar = st.progress(0)
+            status_text = st.empty()
             summaries = []
             start_time = time.time()
             for i, chunk in enumerate(chunks):
                 summary = summarizer(chunk, max_length=120, min_length=30, do_sample=False)
                 summaries.append(summary[0]['summary_text'])
                 percent_complete = (i + 1) / len(chunks)
                 elapsed_time = time.time() - start_time
                 estimated_total_time = elapsed_time / percent_complete
                 estimated_time_remaining = estimated_total_time - elapsed_time
                 progress_bar.progress(percent_complete)
                 status_text.markdown(f"**Progress:** {percent_complete * 100:.2f}% - "
                                      f"**Estimated time remaining:** {estimated_time_remaining:.2f} seconds")
             summary_text = ' '.join(summaries)
             st.subheader("Summarized Article Content")
             st.text_area("Summary", summary_text, height=300)
             st.markdown(f"**Summary Length:** {len(summary_text)} characters")
             pdf_buffer = create_pdf(summary_text)
+            # Compression Ratio
             original_length = len(article.split())
             summary_length = len(summary_text.split())
             compression_ratio = (summary_length / original_length) * 100
+            st.markdown(f"### Compression Ratio: {round(compression_ratio)}%")
             if compression_ratio < 20:
+                st.success(f"Great Compression!\nThe summary is succinct and effectively highlights key points.")
             elif 20 <= compression_ratio <= 40:
+                st.info(f"Well-balanced Summary.\nIt maintains essential details while being brief.")
             else:
+                st.warning(f"Compression may be excessive.\nThe summary could be too brief and miss important details.")
+            # Display buttons in columns
+            col1, col2 = st.columns([1, 1])
+            with col1:
+                st.download_button(
+                    label="Download Summary as PDF",
+                    data=pdf_buffer,
+                    file_name="summarized_article.pdf",
+                    mime="application/pdf"
+                )
+            with col2:
+                if st.button("Read Aloud Summary"):
+                    read_aloud(summary_text)
         except Exception as e:
             st.warning(f"Error: {e}")