File size: 791 Bytes
7cefab8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
"""
Oculus Unified Vision-Language Model
A HuggingFace-compatible multimodal model combining:
- DINOv3 (vision encoder)
- SigLIP2 (vision encoder)
- Trained Projector (vision-to-language bridge)
- LLM (language generation)
Supports:
- Image captioning
- Visual question answering
- Object detection (Box mode)
- Point detection (counting)
- Polygon segmentation
- Optional reasoning with thinking traces
"""
from .modeling_oculus import (
OculusForConditionalGeneration,
OculusVisionEncoder,
OculusProjector,
)
from .configuration_oculus import OculusConfig
from .processing_oculus import OculusProcessor
__all__ = [
"OculusForConditionalGeneration",
"OculusVisionEncoder",
"OculusProjector",
"OculusConfig",
"OculusProcessor",
]
__version__ = "0.2.0"
|