|
|
|
|
|
|
|
|
|
|
|
|
|
|
from ovis_image.model.args import OvisImageModelArgs |
|
|
from ovis_image.model.autoencoder import AutoEncoderParams |
|
|
from ovis_image.model.model import OvisImageModel |
|
|
|
|
|
__all__ = [ |
|
|
"OvisImageModelArgs", |
|
|
"OvisImageModel", |
|
|
"ovis_image_configs", |
|
|
] |
|
|
|
|
|
|
|
|
ovis_image_configs = { |
|
|
"ovis-image-7b": OvisImageModelArgs( |
|
|
in_channels=64, |
|
|
out_channels=64, |
|
|
context_in_dim=2048, |
|
|
hidden_size=3072, |
|
|
mlp_ratio=4.0, |
|
|
num_heads=24, |
|
|
depth=6, |
|
|
double_block_type="DoubleStreamBlock", |
|
|
depth_single_blocks=27, |
|
|
axes_dim=(16, 56, 56), |
|
|
theta=10_000, |
|
|
qkv_bias=True, |
|
|
activation = "swiglu", |
|
|
autoencoder_params=AutoEncoderParams( |
|
|
resolution=256, |
|
|
in_channels=3, |
|
|
ch=128, |
|
|
out_ch=3, |
|
|
ch_mult=(1, 2, 4, 4), |
|
|
num_res_blocks=2, |
|
|
z_channels=16, |
|
|
scale_factor=0.3611, |
|
|
shift_factor=0.1159, |
|
|
), |
|
|
), |
|
|
} |
|
|
|
|
|
|