Nawal20 commited on
Commit
575ba46
·
verified ·
1 Parent(s): 6429958

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -0
app.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import fitz # PyMuPDF for extracting text from PDF
3
+ from transformers import pipeline
4
+
5
+ # Load pre-trained summarization model from Hugging Face
6
+ summarizer = pipeline("summarization")
7
+
8
+ # Function to extract text from PDF
9
+ def extract_text_from_pdf(pdf_file):
10
+ doc = fitz.open(pdf_file)
11
+ text = ""
12
+ for page_num in range(len(doc)):
13
+ page = doc.load_page(page_num)
14
+ text += page.get_text()
15
+ return text
16
+
17
+ # Streamlit interface
18
+ st.title("Automated Datasheet Summarizer")
19
+ st.markdown("Upload a PDF datasheet, and get a summarized version of its key points!")
20
+
21
+ # File uploader widget for the PDF file
22
+ pdf_file = st.file_uploader("Upload your datasheet PDF", type=["pdf"])
23
+
24
+ if pdf_file is not None:
25
+ # Extract text from PDF
26
+ with st.spinner("Extracting text from the datasheet..."):
27
+ pdf_text = extract_text_from_pdf(pdf_file)
28
+ st.write("Text extracted successfully!")
29
+
30
+ # Show a preview of the extracted text
31
+ st.subheader("Extracted Text Preview:")
32
+ st.text_area("Extracted Text", pdf_text[:1500], height=300) # Show first 1500 chars
33
+
34
+ # Summarization button
35
+ if st.button("Summarize Text"):
36
+ with st.spinner("Summarizing..."):
37
+ # If the extracted text is too long, truncate it for summarization
38
+ max_input_length = 1000 # Max length for summarizer input
39
+ text_to_summarize = pdf_text[:max_input_length]
40
+
41
+ summary = summarizer(text_to_summarize, max_length=200, min_length=50, do_sample=False)
42
+ summarized_text = summary[0]["summary_text"]
43
+
44
+ # Show the summary
45
+ st.subheader("Summarized Text:")
46
+ st.write(summarized_text)