Update core/pdf_utils.py
Browse files- core/pdf_utils.py +4 -3
core/pdf_utils.py
CHANGED
|
@@ -1,12 +1,13 @@
|
|
|
|
|
| 1 |
import io, shutil
|
| 2 |
from typing import List
|
| 3 |
from pdf2image import convert_from_path
|
| 4 |
import pdfplumber
|
| 5 |
|
|
|
|
|
|
|
|
|
|
| 6 |
def pdf_to_images(pdf_path: str, dpi: int = 220, max_pages: int = 6) -> List[bytes]:
|
| 7 |
-
if not shutil.which("pdftoppm"):
|
| 8 |
-
# 環境になくても UI は出す。上位でテキスト抽出にフォールバックさせる。
|
| 9 |
-
raise RuntimeError("pdftoppm(poppler-utils)が見つかりません。画像化はスキップします。")
|
| 10 |
pages = convert_from_path(pdf_path, dpi=dpi, fmt="png")
|
| 11 |
out: List[bytes] = []
|
| 12 |
for i, p in enumerate(pages):
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
import io, shutil
|
| 3 |
from typing import List
|
| 4 |
from pdf2image import convert_from_path
|
| 5 |
import pdfplumber
|
| 6 |
|
| 7 |
+
def has_poppler() -> bool:
|
| 8 |
+
return bool(shutil.which("pdftoppm")) and bool(shutil.which("pdftocairo"))
|
| 9 |
+
|
| 10 |
def pdf_to_images(pdf_path: str, dpi: int = 220, max_pages: int = 6) -> List[bytes]:
|
|
|
|
|
|
|
|
|
|
| 11 |
pages = convert_from_path(pdf_path, dpi=dpi, fmt="png")
|
| 12 |
out: List[bytes] = []
|
| 13 |
for i, p in enumerate(pages):
|