File size: 1,274 Bytes
4994e6b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import io

import fitz
from fastapi import UploadFile
from PIL import Image, ImageEnhance, ImageFilter


class PDFPreprocessor:
    @staticmethod
    async def preprocess(file: UploadFile) -> UploadFile:
        content = await file.read()
        doc = fitz.open(stream=content, filetype="pdf")
        processed_doc = fitz.open()
        for page in doc:
            pix = page.get_pixmap(dpi=300, colorspace=fitz.csGRAY)
            img = Image.open(io.BytesIO(pix.tobytes()))
            img = img.filter(
                ImageFilter.UnsharpMask(radius=1, percent=150, threshold=3)
            )
            enhancer = ImageEnhance.Contrast(img)
            img = enhancer.enhance(1.5)
            buf = io.BytesIO()
            img.save(buf, format="PNG")
            buf.seek(0)
            processed_doc.new_page(width=page.rect.width, height=page.rect.height)
            processed_doc[-1].insert_image(
                processed_doc[-1].rect, stream=buf.getvalue()
            )
        doc.close()
        output_buf = io.BytesIO()
        processed_doc.save(output_buf)
        processed_doc.close()
        output_buf.seek(0)
        return UploadFile(
            file=output_buf,
            filename=file.filename,
            headers=file.headers,
        )