Update image_processing_gemma3_tiled.py
Browse files
image_processing_gemma3_tiled.py
CHANGED
|
@@ -172,6 +172,7 @@ class Gemma3TiledImageProcessor(BaseImageProcessor):
|
|
| 172 |
"""
|
| 173 |
|
| 174 |
model_input_names = ["pixel_values", "tile_grid_shape", "num_crops"]
|
|
|
|
| 175 |
|
| 176 |
def __init__(
|
| 177 |
self,
|
|
@@ -291,15 +292,20 @@ class Gemma3TiledImageProcessor(BaseImageProcessor):
|
|
| 291 |
# num_crops is 0 for each image since we use tiling, not pan-and-scan
|
| 292 |
num_crops = [0] * len(all_pixel_values)
|
| 293 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 294 |
data = {
|
| 295 |
-
"pixel_values":
|
| 296 |
"tile_grid_shape": all_grid_shapes,
|
| 297 |
"num_crops": num_crops,
|
| 298 |
}
|
| 299 |
|
| 300 |
-
|
| 301 |
-
# (different images have different tile counts). Let the model handle it.
|
| 302 |
-
return BatchFeature(data=data, tensor_type=None)
|
| 303 |
|
| 304 |
|
| 305 |
__all__ = ["Gemma3TiledImageProcessor", "calculate_tile_grid", "tile_image"]
|
|
|
|
| 172 |
"""
|
| 173 |
|
| 174 |
model_input_names = ["pixel_values", "tile_grid_shape", "num_crops"]
|
| 175 |
+
_auto_class = "AutoImageProcessor" # Required for auto_map in preprocessor_config.json
|
| 176 |
|
| 177 |
def __init__(
|
| 178 |
self,
|
|
|
|
| 292 |
# num_crops is 0 for each image since we use tiling, not pan-and-scan
|
| 293 |
num_crops = [0] * len(all_pixel_values)
|
| 294 |
|
| 295 |
+
# Concatenate all tiles into a single array for vLLM compatibility
|
| 296 |
+
# vLLM's flat_from_sizes expects a single tensor, not a list
|
| 297 |
+
if len(all_pixel_values) > 0:
|
| 298 |
+
concatenated_pixels = np.concatenate(all_pixel_values, axis=0)
|
| 299 |
+
else:
|
| 300 |
+
concatenated_pixels = np.array([])
|
| 301 |
+
|
| 302 |
data = {
|
| 303 |
+
"pixel_values": concatenated_pixels,
|
| 304 |
"tile_grid_shape": all_grid_shapes,
|
| 305 |
"num_crops": num_crops,
|
| 306 |
}
|
| 307 |
|
| 308 |
+
return BatchFeature(data=data, tensor_type=return_tensors)
|
|
|
|
|
|
|
| 309 |
|
| 310 |
|
| 311 |
__all__ = ["Gemma3TiledImageProcessor", "calculate_tile_grid", "tile_image"]
|