auto-swe-agent-ui / docstream /core /extractor.py
DevilBits's picture
fix: enforce safe empty bounds for tracking data charts and match dataframe list alignments
6085b61
raw
history blame contribute delete
522 Bytes
import fitz
from docstream.core.extractor_v2 import ExtractionError
class PDFExtractor:
def __init__(self, file_path, password=None):
self.file_path = file_path
self.password = password
self.doc = fitz.Document(file_path)
if self.doc.is_encrypted:
if self.password:
self.doc.authenticate(self.password)
else:
raise ExtractionError(
"PDF is password protected. Pass password= to extract()"
)