Spaces:
Build error
Build error
| """ | |
| Script for processing an input CV document | |
| """ | |
| import io | |
| import fitz | |
| from docx import Document | |
| def parse_pdf(pdf_file) -> str: | |
| """Read PDF from Streamlit's file uploader""" | |
| pdf_document = fitz.open("pdf", pdf_file) | |
| n_pages = len(pdf_document) | |
| all_text = [] | |
| for page_number in range(n_pages): | |
| page = pdf_document.load_page(page_number) | |
| all_text.append(page.get_text("text")) | |
| pdf_document.close() | |
| return "\n\n".join(all_text) | |
| def parse_docx(docx_file) -> str: | |
| """Read in docx file""" | |
| docx_file = io.BytesIO(docx_file) | |
| doc = Document(docx_file) | |
| all_text = [para.text for para in doc.paragraphs] | |
| return "\n".join(all_text) | |