File size: 522 Bytes
6085b61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
import fitz

from docstream.core.extractor_v2 import ExtractionError


class PDFExtractor:
    def __init__(self, file_path, password=None):
        self.file_path = file_path
        self.password = password
        self.doc = fitz.Document(file_path)
        if self.doc.is_encrypted:
            if self.password:
                self.doc.authenticate(self.password)
            else:
                raise ExtractionError(
                    "PDF is password protected. Pass password= to extract()"
                )