Add vision_video.mlmodelc (Gemma 4 video encoder, 64 tokens/frame)
Browse files
vision_video.mlmodelc/analytics/coremldata.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dc1238423fab6eb9d256112410618e6a238e91bb268c5826c05db291b4294f59
|
| 3 |
+
size 243
|
vision_video.mlmodelc/coremldata.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:964fb81d893883036b585f83a962a2db6ce39eaf23c7b418868e6c928c5d0a27
|
| 3 |
+
size 418
|
vision_video.mlmodelc/metadata.json
ADDED
|
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"metadataOutputVersion" : "3.0",
|
| 4 |
+
"storagePrecision" : "Float16",
|
| 5 |
+
"outputSchema" : [
|
| 6 |
+
{
|
| 7 |
+
"hasShapeFlexibility" : "0",
|
| 8 |
+
"isOptional" : "0",
|
| 9 |
+
"dataType" : "Float16",
|
| 10 |
+
"formattedType" : "MultiArray (Float16)",
|
| 11 |
+
"shortDescription" : "",
|
| 12 |
+
"shape" : "[]",
|
| 13 |
+
"name" : "image_features",
|
| 14 |
+
"type" : "MultiArray"
|
| 15 |
+
}
|
| 16 |
+
],
|
| 17 |
+
"modelParameters" : [
|
| 18 |
+
|
| 19 |
+
],
|
| 20 |
+
"specificationVersion" : 9,
|
| 21 |
+
"mlProgramOperationTypeHistogram" : {
|
| 22 |
+
"Ios18.expandDims" : 135,
|
| 23 |
+
"Ios18.mul" : 423,
|
| 24 |
+
"Ios18.cos" : 2,
|
| 25 |
+
"Ios18.softmax" : 16,
|
| 26 |
+
"Ios18.matmul" : 36,
|
| 27 |
+
"Ios18.floorDiv" : 2,
|
| 28 |
+
"Ios16.reduceMean" : 113,
|
| 29 |
+
"Ios18.logicalNot" : 2,
|
| 30 |
+
"Ios18.equal" : 2,
|
| 31 |
+
"Ios18.sin" : 2,
|
| 32 |
+
"Split" : 96,
|
| 33 |
+
"Select" : 3,
|
| 34 |
+
"Ios18.greaterEqual" : 1,
|
| 35 |
+
"Ios16.reduceMax" : 1,
|
| 36 |
+
"Ios16.reduceMin" : 2,
|
| 37 |
+
"Ios18.add" : 229,
|
| 38 |
+
"Ios16.reduceSum" : 1,
|
| 39 |
+
"Tile" : 1,
|
| 40 |
+
"Ios18.reshape" : 64,
|
| 41 |
+
"Ios18.maximum" : 1,
|
| 42 |
+
"Ios18.linear" : 114,
|
| 43 |
+
"Ios18.concat" : 100,
|
| 44 |
+
"Ios18.transpose" : 67,
|
| 45 |
+
"OneHot" : 2,
|
| 46 |
+
"Ios18.sub" : 2,
|
| 47 |
+
"Ios18.cast" : 16,
|
| 48 |
+
"Ios18.pow" : 226,
|
| 49 |
+
"Ios18.clip" : 176,
|
| 50 |
+
"Ios18.gelu" : 16,
|
| 51 |
+
"Ios18.gatherNd" : 1,
|
| 52 |
+
"Ios18.sliceByIndex" : 133,
|
| 53 |
+
"NonZero" : 1
|
| 54 |
+
},
|
| 55 |
+
"computePrecision" : "Mixed (Float16, Int16, Int32)",
|
| 56 |
+
"isUpdatable" : "0",
|
| 57 |
+
"stateSchema" : [
|
| 58 |
+
|
| 59 |
+
],
|
| 60 |
+
"availability" : {
|
| 61 |
+
"macOS" : "15.0",
|
| 62 |
+
"tvOS" : "18.0",
|
| 63 |
+
"visionOS" : "2.0",
|
| 64 |
+
"watchOS" : "11.0",
|
| 65 |
+
"iOS" : "18.0",
|
| 66 |
+
"macCatalyst" : "18.0"
|
| 67 |
+
},
|
| 68 |
+
"modelType" : {
|
| 69 |
+
"name" : "MLModelType_mlProgram"
|
| 70 |
+
},
|
| 71 |
+
"userDefinedMetadata" : {
|
| 72 |
+
"com.github.apple.coremltools.conversion_date" : "2026-04-15",
|
| 73 |
+
"com.github.apple.coremltools.source" : "torch==2.7.0",
|
| 74 |
+
"com.github.apple.coremltools.version" : "9.0",
|
| 75 |
+
"com.github.apple.coremltools.source_dialect" : "TorchScript"
|
| 76 |
+
},
|
| 77 |
+
"inputSchema" : [
|
| 78 |
+
{
|
| 79 |
+
"hasShapeFlexibility" : "0",
|
| 80 |
+
"isOptional" : "0",
|
| 81 |
+
"dataType" : "Float32",
|
| 82 |
+
"formattedType" : "MultiArray (Float32 1 × 630 × 768)",
|
| 83 |
+
"shortDescription" : "",
|
| 84 |
+
"shape" : "[1, 630, 768]",
|
| 85 |
+
"name" : "pixel_values",
|
| 86 |
+
"type" : "MultiArray"
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"hasShapeFlexibility" : "0",
|
| 90 |
+
"isOptional" : "0",
|
| 91 |
+
"dataType" : "Int32",
|
| 92 |
+
"formattedType" : "MultiArray (Int32 1 × 630 × 2)",
|
| 93 |
+
"shortDescription" : "",
|
| 94 |
+
"shape" : "[1, 630, 2]",
|
| 95 |
+
"name" : "pixel_position_ids",
|
| 96 |
+
"type" : "MultiArray"
|
| 97 |
+
}
|
| 98 |
+
],
|
| 99 |
+
"generatedClassName" : "vision_video",
|
| 100 |
+
"method" : "predict"
|
| 101 |
+
}
|
| 102 |
+
]
|
vision_video.mlmodelc/model.mil
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
vision_video.mlmodelc/weights/weight.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2a58aad4747e2c5269fbcae1b80ac3342bada076223052e2f093d344ccce4309
|
| 3 |
+
size 338081024
|