|
|
|
|
|
|
|
|
|
|
|
|
|
|
from transformers.utils import logging |
|
|
from .configuration_deepseek_v2 import DeepseekV2Config |
|
|
|
|
|
logger = logging.get_logger(__name__) |
|
|
|
|
|
DEEPSEEK_OCR_PRETRAINED_CONFIG_ARCHIVE_MAP = {} |
|
|
|
|
|
class DeepseekOCRConfig(DeepseekV2Config): |
|
|
""" |
|
|
Config for Deepseek-OCR. |
|
|
|
|
|
Inherits all language-model fields from DeepseekV2Config |
|
|
(hidden_size, hidden_act, attention_bias, etc.) and adds |
|
|
OCR / vision specific metadata. |
|
|
""" |
|
|
|
|
|
model_type = "deepseekocr" |
|
|
|
|
|
def __init__( |
|
|
self, |
|
|
|
|
|
candidate_resolutions=None, |
|
|
global_view_pos="head", |
|
|
tile_tag="2D", |
|
|
projector_config=None, |
|
|
vision_config=None, |
|
|
language_config=None, |
|
|
**kwargs, |
|
|
): |
|
|
|
|
|
|
|
|
if language_config is not None and isinstance(language_config, dict): |
|
|
base = dict(language_config) |
|
|
base.update(kwargs) |
|
|
kwargs = base |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
super().__init__(**kwargs) |
|
|
|
|
|
|
|
|
self.candidate_resolutions = candidate_resolutions or [[1024, 1024]] |
|
|
self.global_view_pos = global_view_pos |
|
|
self.tile_tag = tile_tag |
|
|
|
|
|
|
|
|
self.projector_config = projector_config |
|
|
self.vision_config = vision_config |
|
|
self.language_config = language_config |
|
|
|
|
|
logger.info("✅ DeepseekOCRConfig initialized (inherits DeepseekV2Config).") |
|
|
|
|
|
|
|
|
|