Spaces:
Runtime error
Runtime error
| import os | |
| import re | |
| from app.parser.parsers.pdf import PdfParser | |
| from app.parser.parsers.txt import TxtParser | |
| from app.parser.parsers.docx import DocxParser | |
| from tests.parser.data import dummy_docx, dummy_pdf, dummy_txt | |
| def standarize(text): | |
| return re.sub(r"\s+", " ", text).strip() | |
| def test_pdf_parser(): | |
| text = ( | |
| "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor" | |
| "incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud" | |
| "exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat." | |
| ) | |
| temp_file_path = dummy_pdf(text) | |
| parsed_text = PdfParser.parse(temp_file_path) | |
| os.remove(temp_file_path) | |
| assert standarize(parsed_text) == standarize(text) | |
| def test_txt_parser(): | |
| text = ( | |
| "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor" | |
| "incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud" | |
| "exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat." | |
| ) | |
| temp_file_path = dummy_txt(text) | |
| parsed_text = TxtParser.parse(temp_file_path) | |
| os.remove(temp_file_path) | |
| assert standarize(parsed_text) == standarize(text) | |
| def test_docx_parser(): | |
| text = ( | |
| "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor" | |
| "incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud" | |
| "exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat." | |
| ) | |
| temp_file_path = dummy_docx(text) | |
| parsed_text = DocxParser.parse(temp_file_path) | |
| os.remove(temp_file_path) | |
| assert standarize(parsed_text) == standarize(text) | |