Spaces:
Sleeping
Sleeping
File size: 664 Bytes
db7c35f f93ed5e db7c35f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 | import PyPDF2
import docx
def extract_text_from_file(uploaded_file):
if uploaded_file.name.endswith(".pdf"):
return extract_text_from_pdf(uploaded_file)
elif uploaded_file.name.endswith(".docx"):
return extract_text_from_docx(uploaded_file)
else:
return "Unsupported file format."
def extract_text_from_pdf(uploaded_file):
pdf_reader = PyPDF2.PdfReader(uploaded_file)
text = "".join([page.extract_text() for page in pdf_reader.pages if page.extract_text()])
return text
def extract_text_from_docx(uploaded_file):
doc = docx.Document(uploaded_file)
return "\n".join([para.text for para in doc.paragraphs])
|