Spaces:
Sleeping
Sleeping
| import fitz # PyMuPDF | |
| import docx | |
| """ | |
| def extract_text_from_pdf(pdf_file): | |
| text = "" | |
| with fitz.open(stream=pdf_file.read(), filetype="pdf") as doc: | |
| for page in doc: | |
| text += page.get_text() | |
| return text | |
| """ | |
| def extract_text_from_pdf(file): | |
| pdf_document = fitz.open(stream=file.read(), filetype="pdf") | |
| text = "" | |
| for page_num in range(len(pdf_document)): | |
| page = pdf_document.load_page(page_num) | |
| text += page.get_text() | |
| return text | |
| def extract_text_from_word(doc_file): | |
| text = "" | |
| doc = docx.Document(doc_file) | |
| for para in doc.paragraphs: | |
| text += para.text | |
| return text | |