Spaces:
Sleeping
Sleeping
File size: 840 Bytes
0d5d472 50a05c2 0d5d472 a918fa5 0d5d472 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 | import PyPDF2
import io
import streamlit as st
def extract_text_from_pdf(pdf_file):
"""
Extract text from a PDF file
"""
pdf_text = ""
try:
pdf_reader = PyPDF2.PdfReader(pdf_file)
for page_num in range(len(pdf_reader.pages)):
page = pdf_reader.pages[page_num]
pdf_text += page.extract_text() + "\n\n"
return pdf_text
except Exception as e:
st.error(f"Error extracting text from PDF: {e}")
return None
def get_document_text(uploaded_file, file_type):
"""
Get text from an uploaded document (PDF or TXT)
"""
if file_type == "Text (.txt)":
# Read text file
return uploaded_file.getvalue().decode("utf-8")
else:
# Extract text from PDF
return extract_text_from_pdf(io.BytesIO(uploaded_file.getvalue())) |