Spaces:
Runtime error
Runtime error
| import base64 | |
| import io | |
| import pdf2image | |
| from typing import Any | |
| class Preprocessor: | |
| """Preprocessor.""" | |
| def run(self, file_path: str) -> Any: | |
| """Preprocess.""" | |
| raise NotImplementedError("Preprocess method is not implemented") | |
| # Convert PDF to image | |
| class PdfPreprocessor(Preprocessor): | |
| """PDF Preprocessor.""" | |
| def run(self, file_path: str) -> str: | |
| images = pdf2image.convert_from_path(file_path) | |
| image = images[0] # Assuming there is only one page in the PDF | |
| # Convert image to base64 | |
| with io.BytesIO() as buffer: | |
| image.save(buffer, format="JPEG") | |
| image_content = buffer.getvalue() | |
| file_content = base64.b64encode(image_content).decode("utf-8") | |
| # Process all pages and return a list of images | |
| images = pdf2image.convert_from_path(file_path) | |
| image_list = [] | |
| for image in images: | |
| # Convert image to base64 | |
| with io.BytesIO() as buffer: | |
| image.save(buffer, format="JPEG") | |
| image_content = buffer.getvalue() | |
| file_content = base64.b64encode(image_content).decode("utf-8") | |
| image_list.append(file_content) | |
| return image_list | |