kobiakor15's picture
Upload oculus_unified_model/__init__.py with huggingface_hub
7cefab8 verified
"""
Oculus Unified Vision-Language Model
A HuggingFace-compatible multimodal model combining:
- DINOv3 (vision encoder)
- SigLIP2 (vision encoder)
- Trained Projector (vision-to-language bridge)
- LLM (language generation)
Supports:
- Image captioning
- Visual question answering
- Object detection (Box mode)
- Point detection (counting)
- Polygon segmentation
- Optional reasoning with thinking traces
"""
from .modeling_oculus import (
OculusForConditionalGeneration,
OculusVisionEncoder,
OculusProjector,
)
from .configuration_oculus import OculusConfig
from .processing_oculus import OculusProcessor
__all__ = [
"OculusForConditionalGeneration",
"OculusVisionEncoder",
"OculusProjector",
"OculusConfig",
"OculusProcessor",
]
__version__ = "0.2.0"