Spaces:

Nawal20
/

Datasheet_Summerizer

Build error

App Files Files Community

Nawal20 commited on Jan 1, 2025

Commit

575ba46

verified ·

1 Parent(s): 6429958

Create app.py

Browse files

Files changed (1) hide show

app.py +46 -0

app.py ADDED Viewed

	@@ -0,0 +1,46 @@

+import streamlit as st
+import fitz  # PyMuPDF for extracting text from PDF
+from transformers import pipeline
+# Load pre-trained summarization model from Hugging Face
+summarizer = pipeline("summarization")
+# Function to extract text from PDF
+def extract_text_from_pdf(pdf_file):
+    doc = fitz.open(pdf_file)
+    text = ""
+    for page_num in range(len(doc)):
+        page = doc.load_page(page_num)
+        text += page.get_text()
+    return text
+# Streamlit interface
+st.title("Automated Datasheet Summarizer")
+st.markdown("Upload a PDF datasheet, and get a summarized version of its key points!")
+# File uploader widget for the PDF file
+pdf_file = st.file_uploader("Upload your datasheet PDF", type=["pdf"])
+if pdf_file is not None:
+    # Extract text from PDF
+    with st.spinner("Extracting text from the datasheet..."):
+        pdf_text = extract_text_from_pdf(pdf_file)
+        st.write("Text extracted successfully!")
+    # Show a preview of the extracted text
+    st.subheader("Extracted Text Preview:")
+    st.text_area("Extracted Text", pdf_text[:1500], height=300)  # Show first 1500 chars
+    # Summarization button
+    if st.button("Summarize Text"):
+        with st.spinner("Summarizing..."):
+            # If the extracted text is too long, truncate it for summarization
+            max_input_length = 1000  # Max length for summarizer input
+            text_to_summarize = pdf_text[:max_input_length]
+            summary = summarizer(text_to_summarize, max_length=200, min_length=50, do_sample=False)
+            summarized_text = summary[0]["summary_text"]
+            # Show the summary
+            st.subheader("Summarized Text:")
+            st.write(summarized_text)