|
|
from typing import BinaryIO, Any |
|
|
import asyncio |
|
|
from markitdown._base_converter import DocumentConverter, DocumentConverterResult |
|
|
from markitdown._stream_info import StreamInfo |
|
|
from markitdown.converters._llm_caption import llm_caption |
|
|
from markitdown.converters._exiftool import exiftool_metadata |
|
|
|
|
|
from ._base_converter import AsyncDocumentConverterResult |
|
|
|
|
|
ACCEPTED_MIME_TYPE_PREFIXES = [ |
|
|
"image/jpeg", |
|
|
"image/png", |
|
|
] |
|
|
|
|
|
ACCEPTED_FILE_EXTENSIONS = [".jpg", ".jpeg", ".png"] |
|
|
|
|
|
|
|
|
class ImageConverter(DocumentConverter): |
|
|
""" |
|
|
Converts images to markdown via extraction of metadata (if `exiftool` is installed), and description via a multimodal LLM (if an llm_client is configured). |
|
|
""" |
|
|
|
|
|
def accepts( |
|
|
self, |
|
|
file_stream: BinaryIO, |
|
|
stream_info: StreamInfo, |
|
|
**kwargs: Any, |
|
|
) -> bool: |
|
|
mimetype = (stream_info.mimetype or "").lower() |
|
|
extension = (stream_info.extension or "").lower() |
|
|
|
|
|
if extension in ACCEPTED_FILE_EXTENSIONS: |
|
|
return True |
|
|
|
|
|
for prefix in ACCEPTED_MIME_TYPE_PREFIXES: |
|
|
if mimetype.startswith(prefix): |
|
|
return True |
|
|
|
|
|
return False |
|
|
|
|
|
def convert( |
|
|
self, |
|
|
file_stream: BinaryIO, |
|
|
stream_info: StreamInfo, |
|
|
**kwargs: Any, |
|
|
) -> DocumentConverterResult: |
|
|
md_content = "" |
|
|
|
|
|
|
|
|
metadata = exiftool_metadata( |
|
|
file_stream, exiftool_path=kwargs.get("exiftool_path") |
|
|
) |
|
|
|
|
|
if metadata: |
|
|
for f in [ |
|
|
"ImageSize", |
|
|
"Title", |
|
|
"Caption", |
|
|
"Description", |
|
|
"Keywords", |
|
|
"Artist", |
|
|
"Author", |
|
|
"DateTimeOriginal", |
|
|
"CreateDate", |
|
|
"GPSPosition", |
|
|
]: |
|
|
if f in metadata: |
|
|
md_content += f"{f}: {metadata[f]}\n" |
|
|
|
|
|
|
|
|
llm_client = kwargs.get("llm_client") |
|
|
llm_model = kwargs.get("llm_model") |
|
|
if llm_client is not None and llm_model is not None: |
|
|
llm_description = llm_caption( |
|
|
file_stream, |
|
|
stream_info, |
|
|
client=llm_client, |
|
|
model=llm_model, |
|
|
prompt=kwargs.get("llm_prompt"), |
|
|
) |
|
|
|
|
|
if asyncio.iscoroutine(llm_description): |
|
|
return AsyncDocumentConverterResult( |
|
|
llm_description, |
|
|
) |
|
|
|
|
|
if llm_description is not None: |
|
|
md_content += "\n# Description:\n" + llm_description.strip() + "\n" |
|
|
|
|
|
return DocumentConverterResult( |
|
|
markdown=md_content, |
|
|
) |