Spaces:
Sleeping
Sleeping
| import PyPDF2 | |
| import docx | |
| def extract_text_from_file(uploaded_file): | |
| if uploaded_file.name.endswith(".pdf"): | |
| return extract_text_from_pdf(uploaded_file) | |
| elif uploaded_file.name.endswith(".docx"): | |
| return extract_text_from_docx(uploaded_file) | |
| else: | |
| return "Unsupported file format." | |
| def extract_text_from_pdf(uploaded_file): | |
| pdf_reader = PyPDF2.PdfReader(uploaded_file) | |
| text = "".join([page.extract_text() for page in pdf_reader.pages if page.extract_text()]) | |
| return text | |
| def extract_text_from_docx(uploaded_file): | |
| doc = docx.Document(uploaded_file) | |
| return "\n".join([para.text for para in doc.paragraphs]) | |