| | from __future__ import annotations |
| |
|
| | import os |
| | from ctypes import ( |
| | c_bool, |
| | c_char_p, |
| | c_int, |
| | c_uint8, |
| | c_uint32, |
| | c_float, |
| | c_void_p, |
| | c_size_t, |
| | POINTER, |
| | _Pointer, |
| | Structure, |
| | byref, |
| | ) |
| | import pathlib |
| | from typing import ( |
| | Union, |
| | NewType, |
| | Optional, |
| | TYPE_CHECKING, |
| | ) |
| |
|
| | import llama_cpp.llama_cpp as llama_cpp |
| |
|
| | from llama_cpp._ctypes_extensions import ( |
| | load_shared_library, |
| | ctypes_function_for_shared_library, |
| | ) |
| |
|
| | if TYPE_CHECKING: |
| | from llama_cpp._ctypes_extensions import ( |
| | CtypesArray, |
| | ) |
| |
|
| |
|
| | |
| | _libmtmd_base_name = "mtmd" |
| | _libmtmd_override_path = os.environ.get("MTMD_CPP_LIB") |
| | _libmtmd_base_path = pathlib.Path(os.path.abspath(os.path.dirname(__file__))) / "lib" if _libmtmd_override_path is None else pathlib.Path() |
| |
|
| | |
| | _libmtmd = load_shared_library(_libmtmd_base_name, _libmtmd_base_path) |
| |
|
| | ctypes_function = ctypes_function_for_shared_library(_libmtmd) |
| |
|
| | |
| | |
| | |
| |
|
| | |
| | mtmd_context_p = NewType("mtmd_context_p", int) |
| | mtmd_context_p_ctypes = c_void_p |
| |
|
| | mtmd_bitmap_p = NewType("mtmd_bitmap_p", int) |
| | mtmd_bitmap_p_ctypes = c_void_p |
| |
|
| | mtmd_image_tokens_p = NewType("mtmd_image_tokens_p", int) |
| | mtmd_image_tokens_p_ctypes = c_void_p |
| |
|
| | mtmd_input_chunk_p = NewType("mtmd_input_chunk_p", int) |
| | mtmd_input_chunk_p_ctypes = c_void_p |
| |
|
| | mtmd_input_chunks_p = NewType("mtmd_input_chunks_p", int) |
| | mtmd_input_chunks_p_ctypes = c_void_p |
| |
|
| | |
| | MTMD_INPUT_CHUNK_TYPE_TEXT = 0 |
| | MTMD_INPUT_CHUNK_TYPE_IMAGE = 1 |
| | MTMD_INPUT_CHUNK_TYPE_AUDIO = 2 |
| |
|
| | |
| | class mtmd_context_params(Structure): |
| | _fields_ = [ |
| | ("use_gpu", c_bool), |
| | ("print_timings", c_bool), |
| | ("n_threads", c_int), |
| | ("verbosity", c_int), |
| | ("image_marker", c_char_p), |
| | ("media_marker", c_char_p), |
| | ] |
| |
|
| | class mtmd_input_text(Structure): |
| | _fields_ = [ |
| | ("text", c_char_p), |
| | ("add_special", c_bool), |
| | ("parse_special", c_bool), |
| | ] |
| |
|
| | |
| | |
| | |
| |
|
| | |
| | @ctypes_function("mtmd_default_marker", [], c_char_p) |
| | def mtmd_default_marker() -> bytes: |
| | ... |
| |
|
| | |
| | @ctypes_function("mtmd_context_params_default", [], mtmd_context_params) |
| | def mtmd_context_params_default() -> mtmd_context_params: |
| | ... |
| |
|
| | |
| | |
| | |
| | @ctypes_function( |
| | "mtmd_init_from_file", |
| | [c_char_p, llama_cpp.llama_model_p_ctypes, mtmd_context_params], |
| | mtmd_context_p_ctypes |
| | ) |
| | def mtmd_init_from_file( |
| | mmproj_fname: bytes, |
| | text_model: llama_cpp.llama_model_p, |
| | ctx_params: mtmd_context_params, |
| | /, |
| | ) -> Optional[mtmd_context_p]: |
| | ... |
| |
|
| | |
| | @ctypes_function("mtmd_free", [mtmd_context_p_ctypes], None) |
| | def mtmd_free(ctx: mtmd_context_p, /): |
| | ... |
| |
|
| | |
| | @ctypes_function("mtmd_support_vision", [mtmd_context_p_ctypes], c_bool) |
| | def mtmd_support_vision(ctx: mtmd_context_p, /) -> bool: |
| | ... |
| |
|
| | |
| | @ctypes_function( |
| | "mtmd_bitmap_init", |
| | [c_uint32, c_uint32, POINTER(c_uint8)], |
| | mtmd_bitmap_p_ctypes |
| | ) |
| | def mtmd_bitmap_init( |
| | nx: Union[c_uint32, int], |
| | ny: Union[c_uint32, int], |
| | data: CtypesArray[c_uint8], |
| | /, |
| | ) -> Optional[mtmd_bitmap_p]: |
| | ... |
| |
|
| | |
| | @ctypes_function("mtmd_bitmap_free", [mtmd_bitmap_p_ctypes], None) |
| | def mtmd_bitmap_free(bitmap: mtmd_bitmap_p, /): |
| | ... |
| |
|
| | |
| | @ctypes_function("mtmd_input_chunks_init", [], mtmd_input_chunks_p_ctypes) |
| | def mtmd_input_chunks_init() -> Optional[mtmd_input_chunks_p]: |
| | ... |
| |
|
| | |
| | @ctypes_function("mtmd_input_chunks_free", [mtmd_input_chunks_p_ctypes], None) |
| | def mtmd_input_chunks_free(chunks: mtmd_input_chunks_p, /): |
| | ... |
| |
|
| | |
| | @ctypes_function("mtmd_input_chunks_size", [mtmd_input_chunks_p_ctypes], c_size_t) |
| | def mtmd_input_chunks_size(chunks: mtmd_input_chunks_p, /) -> int: |
| | ... |
| |
|
| | |
| | @ctypes_function( |
| | "mtmd_input_chunks_get", |
| | [mtmd_input_chunks_p_ctypes, c_size_t], |
| | mtmd_input_chunk_p_ctypes |
| | ) |
| | def mtmd_input_chunks_get( |
| | chunks: mtmd_input_chunks_p, idx: Union[c_size_t, int], / |
| | ) -> Optional[mtmd_input_chunk_p]: |
| | ... |
| |
|
| | |
| | |
| | |
| | |
| | |
| | @ctypes_function( |
| | "mtmd_tokenize", |
| | [ |
| | mtmd_context_p_ctypes, |
| | mtmd_input_chunks_p_ctypes, |
| | POINTER(mtmd_input_text), |
| | POINTER(mtmd_bitmap_p_ctypes), |
| | c_size_t, |
| | ], |
| | c_int, |
| | ) |
| | def mtmd_tokenize( |
| | ctx: mtmd_context_p, |
| | output: mtmd_input_chunks_p, |
| | text: "_Pointer[mtmd_input_text]", |
| | bitmaps: CtypesArray[mtmd_bitmap_p_ctypes], |
| | n_bitmaps: Union[c_size_t, int], |
| | /, |
| | ) -> int: |
| | ... |
| |
|
| | |
| | @ctypes_function("mtmd_input_chunk_get_n_tokens", [mtmd_input_chunk_p_ctypes], c_size_t) |
| | def mtmd_input_chunk_get_n_tokens(chunk: mtmd_input_chunk_p, /) -> int: |
| | ... |
| |
|
| | |
| | @ctypes_function("mtmd_input_chunk_get_type", [mtmd_input_chunk_p_ctypes], c_int) |
| | def mtmd_input_chunk_get_type(chunk: mtmd_input_chunk_p, /) -> int: |
| | ... |
| |
|
| | |
| | @ctypes_function( |
| | "mtmd_input_chunk_get_tokens_text", |
| | [mtmd_input_chunk_p_ctypes, POINTER(c_size_t)], |
| | POINTER(llama_cpp.llama_token) |
| | ) |
| | def mtmd_input_chunk_get_tokens_text( |
| | chunk: mtmd_input_chunk_p, n_tokens_output: "_Pointer[c_size_t]", / |
| | ) -> Optional["_Pointer[llama_cpp.llama_token]"]: |
| | ... |
| |
|
| | |
| | |
| | |
| |
|
| | |
| | @ctypes_function( |
| | "mtmd_helper_bitmap_init_from_buf", |
| | [mtmd_context_p_ctypes, POINTER(c_uint8), c_size_t], |
| | mtmd_bitmap_p_ctypes |
| | ) |
| | def mtmd_helper_bitmap_init_from_buf( |
| | ctx: mtmd_context_p, |
| | buf: CtypesArray[c_uint8], |
| | length: Union[c_size_t, int], |
| | /, |
| | ) -> Optional[mtmd_bitmap_p]: |
| | ... |
| |
|
| | |
| | @ctypes_function("mtmd_helper_get_n_tokens", [mtmd_input_chunks_p_ctypes], c_size_t) |
| | def mtmd_helper_get_n_tokens(chunks: mtmd_input_chunks_p, /) -> int: |
| | ... |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | @ctypes_function( |
| | "mtmd_helper_eval_chunk_single", |
| | [ |
| | mtmd_context_p_ctypes, |
| | llama_cpp.llama_context_p_ctypes, |
| | mtmd_input_chunk_p_ctypes, |
| | llama_cpp.llama_pos, |
| | llama_cpp.llama_seq_id, |
| | c_int, |
| | c_bool, |
| | POINTER(llama_cpp.llama_pos), |
| | ], |
| | c_int, |
| | ) |
| | def mtmd_helper_eval_chunk_single( |
| | ctx: mtmd_context_p, |
| | lctx: llama_cpp.llama_context_p, |
| | chunk: mtmd_input_chunk_p, |
| | n_past: llama_cpp.llama_pos, |
| | seq_id: llama_cpp.llama_seq_id, |
| | n_batch: Union[c_int, int], |
| | logits_last: Union[c_bool, bool], |
| | new_n_past: "_Pointer[llama_cpp.llama_pos]", |
| | /, |
| | ) -> int: |
| | ... |
| |
|