import io import fitz from fastapi import UploadFile from PIL import Image, ImageEnhance, ImageFilter class PDFPreprocessor: @staticmethod async def preprocess(file: UploadFile) -> UploadFile: content = await file.read() doc = fitz.open(stream=content, filetype="pdf") processed_doc = fitz.open() for page in doc: pix = page.get_pixmap(dpi=300, colorspace=fitz.csGRAY) img = Image.open(io.BytesIO(pix.tobytes())) img = img.filter( ImageFilter.UnsharpMask(radius=1, percent=150, threshold=3) ) enhancer = ImageEnhance.Contrast(img) img = enhancer.enhance(1.5) buf = io.BytesIO() img.save(buf, format="PNG") buf.seek(0) processed_doc.new_page(width=page.rect.width, height=page.rect.height) processed_doc[-1].insert_image( processed_doc[-1].rect, stream=buf.getvalue() ) doc.close() output_buf = io.BytesIO() processed_doc.save(output_buf) processed_doc.close() output_buf.seek(0) return UploadFile( file=output_buf, filename=file.filename, headers=file.headers, )