mlboydaisuke
/

coreml-zoo

Model card Files Files and versions

xet

Community

mlboydaisuke commited on Apr 16

Commit

7f94c79

verified ·

1 Parent(s): faa0b7b

Upload models.json with huggingface_hub

Browse files

Files changed (1) hide show

models.json +465 -627

models.json CHANGED Viewed

@@ -3,458 +3,248 @@
   "updated_at": "2026-04-10",
   "min_app_version": "1.0",
   "categories": [
-    {
-      "id": "llm",
-      "name": "Large Language Models",
-      "icon": "bubble.left.and.text.bubble.right",
-      "order": 0
-    },
-    {
-      "id": "segmentation",
-      "name": "Segmentation",
-      "icon": "person.and.background.dotted",
-      "order": 1
-    },
-    {
-      "id": "enhancement",
-      "name": "Image Enhancement",
-      "icon": "wand.and.stars",
-      "order": 2
-    },
-    {
-      "id": "detection",
-      "name": "Object Detection",
-      "icon": "viewfinder",
-      "order": 3
-    },
-    {
-      "id": "depth",
-      "name": "Depth & Geometry",
-      "icon": "cube.transparent",
-      "order": 4
-    },
-    {
-      "id": "vision_language",
-      "name": "Vision-Language",
-      "icon": "text.viewfinder",
-      "order": 5
-    },
-    {
-      "id": "face",
-      "name": "Face Processing",
-      "icon": "face.smiling",
-      "order": 6
-    },
-    {
-      "id": "generation",
-      "name": "Image Generation",
-      "icon": "sparkles",
-      "order": 7
-    },
-    {
-      "id": "video",
-      "name": "Video Processing",
-      "icon": "film",
-      "order": 8
-    },
-    {
-      "id": "audio",
-      "name": "Audio Processing",
-      "icon": "waveform.circle",
-      "order": 9
-    },
-    {
-      "id": "speech",
-      "name": "Speech & Music",
-      "icon": "music.note",
-      "order": 10
-    }
   ],
   "models": [
-    {
-      "id": "gemma4_e2b",
-      "name": "Gemma 4 E2B",
-      "subtitle": "Google DeepMind, 2025",
-      "category_id": "llm",
-      "description_md": "Google's latest on-device multimodal LLM. 2.3B effective parameters with Per-Layer Embeddings. Text + image input, streaming text output. Runs on Apple Neural Engine at ~31 tok/s decode. Supports multi-turn conversations, image understanding, and reasoning.",
-      "thumbnail_url": "https://huggingface.co/mlboydaisuke/coreml-zoo/resolve/main/thumbnails/gemma4.jpg",
-      "demo": {
-        "template": "chat",
-        "config": {
-          "max_tokens": 1024,
-          "multimodal": true
-        }
-      },
-      "files": [
-        {
-          "name": "gemma4-e2b-coreml.zip",
-          "url": "https://huggingface.co/mlboydaisuke/gemma-4-E2B-coreml/resolve/main/gemma4-e2b-coreml.zip",
-          "archive": "zip",
-          "size_bytes": 2700000000,
-          "sha256": "TODO",
-          "compute_units": "cpuAndNeuralEngine",
-          "kind": "model"
-        }
-      ],
-      "requirements": {
-        "min_ios": "18.0",
-        "min_ram_mb": 1500,
-        "device_capabilities": [
-          "arm64"
-        ]
-      },
-      "license": {
-        "name": "Gemma",
-        "url": "https://ai.google.dev/gemma/terms"
-      },
-      "upstream": {
-        "name": "google/gemma-4-e2b",
-        "url": "https://huggingface.co/google/gemma-4-e2b",
-        "year": 2025
-      }
-    },
     {
       "id": "rmbg_1_4",
       "name": "RMBG-1.4",
       "subtitle": "BRIA AI, 2023",
       "category_id": "segmentation",
-      "description_md": "High-quality background removal. Outputs foreground with alpha mask. 1024×1024 input.",
       "demo": {
         "template": "image_in_out",
-        "config": {
-          "input_size": 1024,
-          "output_type": "mask"
-        }
       },
       "files": [
         {
           "name": "RMBG_1_4.mlpackage.zip",
-          "url": "https://huggingface.co/mlboydaisuke/coreml-zoo/resolve/main/rmbg/RMBG_1_4.mlpackage.zip",
           "archive": "zip",
-          "size_bytes": 38771210,
-          "sha256": "a80dbb5f04c922a8fa698c38592e4e52af4e62471d70bc7c59c28a3355a1da95",
-          "compute_units": "cpuOnly",
           "kind": "model"
         }
       ],
-      "requirements": {
-        "min_ios": "17.0",
-        "min_ram_mb": 300
-      },
-      "license": {
-        "name": "Apache-2.0",
-        "url": "https://huggingface.co/briaai/RMBG-1.4"
-      },
-      "upstream": {
-        "name": "briaai/RMBG-1.4",
-        "url": "https://huggingface.co/briaai/RMBG-1.4",
-        "year": 2023
-      }
     },
     {
       "id": "ddcolor",
-      "name": "DDColor Tiny",
       "subtitle": "Image Colorization, 2023",
       "category_id": "enhancement",
-      "description_md": "Automatic grayscale image colorization via dual decoders. 512×512 input.",
       "demo": {
         "template": "image_in_out",
-        "config": {
-          "input_size": 512,
-          "output_type": "lab_ab"
-        }
       },
       "files": [
         {
-          "name": "DDColor_Tiny.mlpackage.zip",
-          "url": "https://huggingface.co/mlboydaisuke/coreml-zoo/resolve/main/ddcolor/DDColor_Tiny.mlpackage.zip",
           "archive": "zip",
-          "size_bytes": 212344570,
-          "sha256": "bfecea37d66005f602efe13978360b8e4707923234c3d1d00beeb4e36cb1b02c",
           "compute_units": "all",
           "kind": "model"
         }
       ],
-      "requirements": {
-        "min_ios": "17.0",
-        "min_ram_mb": 400
-      },
-      "license": {
-        "name": "Apache-2.0",
-        "url": "https://github.com/piddnad/DDColor"
-      },
-      "upstream": {
-        "name": "piddnad/DDColor",
-        "url": "https://github.com/piddnad/DDColor",
-        "year": 2023
-      }
     },
     {
       "id": "sinsr",
       "name": "SinSR",
       "subtitle": "Single-Step Super-Resolution, 2024",
       "category_id": "enhancement",
-      "description_md": "4× super-resolution via single-step diffusion. 256→1024. Swin Transformer denoiser (FP32).",
       "demo": {
         "template": "image_in_out",
-        "config": {
-          "input_size": 256,
-          "output_type": "sinsr"
-        }
       },
       "files": [
         {
           "name": "SinSR_Encoder.mlpackage.zip",
-          "url": "https://huggingface.co/mlboydaisuke/coreml-zoo/resolve/main/sinsr/SinSR_Encoder.mlpackage.zip",
           "archive": "zip",
-          "size_bytes": 41246338,
-          "sha256": "fdec09d17561ec1bb5a2e829683d48c2b45e76b876285619a6e29a3523b8b7e2",
           "compute_units": "cpuAndGPU",
           "kind": "model"
         },
         {
           "name": "SinSR_Denoiser.mlpackage.zip",
-          "url": "https://huggingface.co/mlboydaisuke/coreml-zoo/resolve/main/sinsr/SinSR_Denoiser.mlpackage.zip",
           "archive": "zip",
-          "size_bytes": 440014511,
-          "sha256": "b31374c2d539b2cdd81499d6062c801ca00e405f5a67507cd609d14e2d6d4beb",
           "compute_units": "cpuOnly",
           "kind": "model"
         },
         {
           "name": "SinSR_Decoder.mlpackage.zip",
-          "url": "https://huggingface.co/mlboydaisuke/coreml-zoo/resolve/main/sinsr/SinSR_Decoder.mlpackage.zip",
           "archive": "zip",
-          "size_bytes": 60880285,
-          "sha256": "b8b9a7b52d6b240cf9fb3352b286ea83eb984fd73f5dd81c9f034f0016a5cb8c",
           "compute_units": "cpuAndGPU",
           "kind": "model"
         }
       ],
-      "requirements": {
-        "min_ios": "17.0",
-        "min_ram_mb": 600
-      },
-      "license": {
-        "name": "Apache-2.0",
-        "url": "https://github.com/wyf0912/SinSR"
-      },
-      "upstream": {
-        "name": "wyf0912/SinSR",
-        "url": "https://github.com/wyf0912/SinSR",
-        "year": 2024
-      }
     },
     {
-      "id": "yolo26s",
-      "name": "YOLO26s",
-      "subtitle": "NMS-Free Detection, 2026",
-      "category_id": "detection",
-      "description_md": "NMS-free object detection. 640×640 input, 80 COCO classes.",
       "demo": {
-        "template": "image_detection",
-        "config": {
-          "input_size": 640,
-          "confidence_threshold": 0.25
-        }
       },
       "files": [
         {
-          "name": "yolo26s.mlpackage.zip",
-          "url": "https://huggingface.co/mlboydaisuke/coreml-zoo/resolve/main/yolo26/yolo26s.mlpackage.zip",
           "archive": "zip",
-          "size_bytes": 17697581,
-          "sha256": "0ec02fb0cf2dbd6e09601cbbc00a9734156ea4c2a52b0da23a984337074c6fd4",
           "compute_units": "all",
           "kind": "model"
         }
       ],
-      "requirements": {
-        "min_ios": "17.0",
-        "min_ram_mb": 300
-      },
-      "license": {
-        "name": "AGPL-3.0",
-        "url": "https://github.com/ultralytics/ultralytics"
-      },
-      "upstream": {
-        "name": "ultralytics/ultralytics",
-        "url": "https://github.com/ultralytics/ultralytics",
-        "year": 2026
-      }
     },
     {
-      "id": "yolo11s",
-      "name": "YOLO11s",
-      "subtitle": "Object Detection, 2024",
       "category_id": "detection",
-      "description_md": "YOLO11 small detection with Vision framework NMS. 640×640 input.",
       "demo": {
         "template": "image_detection",
-        "config": {
-          "input_size": 640,
-          "confidence_threshold": 0.25
-        }
       },
       "files": [
         {
-          "name": "yolo11s.mlpackage.zip",
-          "url": "https://huggingface.co/mlboydaisuke/coreml-zoo/resolve/main/yolov9/yolo11s.mlpackage.zip",
           "archive": "zip",
-          "size_bytes": 17580204,
-          "sha256": "79e82aacc3ad20fc1eb990df6979fae9b927d4b06f33bd20ec0e1c0dcb7d1f6b",
           "compute_units": "all",
           "kind": "model"
         }
       ],
-      "requirements": {
-        "min_ios": "17.0",
-        "min_ram_mb": 300
-      },
-      "license": {
-        "name": "AGPL-3.0",
-        "url": "https://github.com/ultralytics/ultralytics"
-      },
-      "upstream": {
-        "name": "ultralytics/ultralytics",
-        "url": "https://github.com/ultralytics/ultralytics",
-        "year": 2024
-      }
     },
     {
-      "id": "yolov10n",
-      "name": "YOLOv10n",
       "subtitle": "Object Detection, 2024",
       "category_id": "detection",
-      "description_md": "YOLOv10 nano. 640×640 input. Dual-assignment strategy.",
       "demo": {
         "template": "image_detection",
-        "config": {
-          "input_size": 640,
-          "confidence_threshold": 0.25
-        }
       },
       "files": [
         {
-          "name": "YOLOv10N.mlpackage.zip",
-          "url": "https://huggingface.co/mlboydaisuke/coreml-zoo/resolve/main/yolov10/YOLOv10N.mlpackage.zip",
           "archive": "zip",
-          "size_bytes": 4309168,
-          "sha256": "9a687144a6b0b764f508c8f544fe46b6674629b8f09a1e99d8ca69b0be899891",
           "compute_units": "all",
           "kind": "model"
         }
       ],
-      "requirements": {
-        "min_ios": "17.0",
-        "min_ram_mb": 300
-      },
-      "license": {
-        "name": "AGPL-3.0",
-        "url": "https://github.com/THU-MIG/yolov10"
-      },
-      "upstream": {
-        "name": "THU-MIG/yolov10",
-        "url": "https://github.com/THU-MIG/yolov10",
-        "year": 2024
-      }
     },
     {
-      "id": "yoloworld",
-      "name": "YOLO-World",
-      "subtitle": "Open-Vocabulary Detection, 2024",
       "category_id": "detection",
-      "description_md": "Open-vocabulary detection. Type any text query. YOLO-World V2-S + CLIP ViT-B/32.",
       "demo": {
-        "template": "open_vocab_detection",
-        "config": {
-          "input_size": 640
-        }
       },
       "files": [
         {
-          "name": "yoloworld_detector.mlpackage.zip",
-          "url": "https://huggingface.co/mlboydaisuke/coreml-zoo/resolve/main/yoloworld/yoloworld_detector.mlpackage.zip",
           "archive": "zip",
-          "size_bytes": 23710620,
-          "sha256": "611d299ae74c83f90a5cc9f4585709859d5db735baa8ade721e0c2d99cd5af92",
           "compute_units": "all",
           "kind": "model"
-        },
-        {
-          "name": "clip_text_encoder.mlpackage.zip",
-          "url": "https://huggingface.co/mlboydaisuke/coreml-zoo/resolve/main/yoloworld/clip_text_encoder.mlpackage.zip",
-          "archive": "zip",
-          "size_bytes": 116681932,
-          "sha256": "45770a743297e8c2a57cc330d4f5c80f47734263680895b33b593b50dd2c382b",
-          "compute_units": "cpuOnly",
-          "kind": "model"
         }
       ],
-      "requirements": {
-        "min_ios": "17.0",
-        "min_ram_mb": 600
-      },
-      "license": {
-        "name": "GPL-3.0",
-        "url": "https://github.com/AILab-CVC/YOLO-World"
-      },
-      "upstream": {
-        "name": "AILab-CVC/YOLO-World",
-        "url": "https://github.com/AILab-CVC/YOLO-World",
-        "year": 2024
-      }
     },
     {
       "id": "moge2_vitb_normal_504",
       "name": "MoGe-2 ViT-B (504×504)",
       "subtitle": "Microsoft, CVPR 2025",
       "category_id": "depth",
-      "description_md": "Monocular geometry from a single image. Metric depth, surface normals, confidence mask. DINOv2 ViT-B/14 backbone.",
       "demo": {
         "template": "depth_visualization",
         "config": {
           "input_size": 504,
-          "output_keys": [
-            "depth",
-            "normal",
-            "mask",
-            "metric_scale"
-          ],
           "depth_unit": "meters"
         }
       },
       "files": [
         {
           "name": "MoGe2_ViTB_Normal_504.mlpackage.zip",
-          "url": "https://huggingface.co/mlboydaisuke/coreml-zoo/resolve/main/moge2/MoGe2_ViTB_Normal_504.mlpackage.zip",
           "archive": "zip",
-          "size_bytes": 193312088,
-          "sha256": "f60cfb4804707a489d99e24453188cd31ddcabb299bbf6da4507edc9cecbf9e7",
           "compute_units": "all",
           "kind": "model"
         }
       ],
-      "requirements": {
-        "min_ios": "17.0",
-        "min_ram_mb": 600
-      },
-      "license": {
-        "name": "MIT",
-        "url": "https://github.com/microsoft/MoGe/blob/main/LICENSE"
-      },
-      "upstream": {
-        "name": "microsoft/MoGe",
-        "url": "https://github.com/microsoft/MoGe",
-        "year": 2025
-      }
     },
     {
       "id": "siglip",
       "name": "SigLIP",
       "subtitle": "Zero-Shot Classification, 2023",
       "category_id": "vision_language",
-      "description_md": "Zero-shot image classification. Dual encoder (image + text). 224×224 input.",
       "demo": {
         "template": "zero_shot_classify",
         "config": {
@@ -469,50 +259,40 @@
       "files": [
         {
           "name": "SigLIP_ImageEncoder.mlpackage.zip",
-          "url": "https://huggingface.co/mlboydaisuke/coreml-zoo/resolve/main/siglip/SigLIP_ImageEncoder.mlpackage.zip",
           "archive": "zip",
-          "size_bytes": 170352400,
-          "sha256": "98f6abf5f4aa145199f4ae22305f9c1d5929eee6b126daad84783b2b2090ee24",
           "compute_units": "cpuOnly",
           "kind": "model"
         },
         {
           "name": "SigLIP_TextEncoder.mlpackage.zip",
-          "url": "https://huggingface.co/mlboydaisuke/coreml-zoo/resolve/main/siglip/SigLIP_TextEncoder.mlpackage.zip",
           "archive": "zip",
-          "size_bytes": 203975769,
-          "sha256": "9dead2d58705838aef7ad83c3bf4036698c78d872ca1cdd04f2c4a6272009ccf",
           "compute_units": "cpuOnly",
           "kind": "model"
         },
         {
           "name": "siglip_vocab.json",
-          "url": "https://huggingface.co/mlboydaisuke/coreml-zoo/resolve/main/siglip/siglip_vocab.json",
-          "size_bytes": 673754,
-          "sha256": "b94b3a58e04f619936b3890804dff7c478522c07515ff748cf127c5443ee5229",
           "kind": "vocab"
         }
       ],
-      "requirements": {
-        "min_ios": "17.0",
-        "min_ram_mb": 800
-      },
-      "license": {
-        "name": "Apache-2.0",
-        "url": "https://github.com/google-research/big_vision"
-      },
-      "upstream": {
-        "name": "google-research/big_vision",
-        "url": "https://github.com/google-research/big_vision",
-        "year": 2023
-      }
     },
     {
       "id": "florence2",
       "name": "Florence-2",
       "subtitle": "Microsoft, 2024",
       "category_id": "vision_language",
-      "description_md": "Vision-language captioning, OCR, and VQA. Three-stage encoder-decoder. 768×768 input.",
       "demo": {
         "template": "image_to_text",
         "config": {
@@ -523,132 +303,83 @@
           "decoder": "Florence2Decoder.mlpackage.zip",
           "vocab_file": "florence2_vocab.json",
           "tasks": {
-            "caption": [
-              0,
-              2264,
-              473,
-              5,
-              2274,
-              6190,
-              116,
-              2
-            ],
-            "detailed_caption": [
-              0,
-              2264,
-              473,
-              5,
-              31962,
-              2274,
-              6190,
-              116,
-              2
-            ],
-            "ocr": [
-              0,
-              2264,
-              473,
-              5,
-              71307,
-              116,
-              2
-            ]
           }
         }
       },
       "files": [
         {
           "name": "Florence2VisionEncoder.mlpackage.zip",
-          "url": "https://huggingface.co/mlboydaisuke/coreml-zoo/resolve/main/florence2/Florence2VisionEncoder.mlpackage.zip",
           "archive": "zip",
-          "size_bytes": 81198683,
-          "sha256": "9422f189c21220a0f9966eb9d780856772feb55597dcc579fc4e3c88990d0046",
           "compute_units": "cpuOnly",
           "kind": "model"
         },
         {
           "name": "Florence2TextEncoder.mlpackage.zip",
-          "url": "https://huggingface.co/mlboydaisuke/coreml-zoo/resolve/main/florence2/Florence2TextEncoder.mlpackage.zip",
           "archive": "zip",
-          "size_bytes": 72742890,
-          "sha256": "f985deeef0408ea8aac33ac4f5c6d9635cd9c64c98b53f85031db6e27f3bfd92",
           "compute_units": "cpuOnly",
           "kind": "model"
         },
         {
           "name": "Florence2Decoder.mlpackage.zip",
-          "url": "https://huggingface.co/mlboydaisuke/coreml-zoo/resolve/main/florence2/Florence2Decoder.mlpackage.zip",
           "archive": "zip",
-          "size_bytes": 85329746,
-          "sha256": "fe85a6faab5281272bcd79dabfbf87d60ba1a78dd9455e2bf71c67a134d61dc5",
           "compute_units": "cpuOnly",
           "kind": "model"
         },
         {
           "name": "florence2_vocab.json",
-          "url": "https://huggingface.co/mlboydaisuke/coreml-zoo/resolve/main/florence2/florence2_vocab.json",
-          "size_bytes": 999352,
-          "sha256": "861fee9af5520403f6dbb4940d6af6627f1481b71cdc4a870f1f61344e57e645",
           "kind": "vocab"
         }
       ],
-      "requirements": {
-        "min_ios": "17.0",
-        "min_ram_mb": 1200
-      },
-      "license": {
-        "name": "MIT",
-        "url": "https://huggingface.co/microsoft/Florence-2-base"
-      },
-      "upstream": {
-        "name": "microsoft/Florence-2",
-        "url": "https://huggingface.co/microsoft/Florence-2-base",
-        "year": 2024
-      }
     },
     {
-      "id": "face3d",
-      "name": "3DDFA V2",
-      "subtitle": "3D Face Reconstruction, 2020",
       "category_id": "face",
-      "description_md": "Single-image 3D face reconstruction. Predicts 6 DoF pose + expression parameters.",
       "demo": {
-        "template": "face_3d",
-        "config": {
-          "input_size": 120
-        }
       },
       "files": [
         {
-          "name": "3DDFA_V2.mlpackage.zip",
-          "url": "https://huggingface.co/mlboydaisuke/coreml-zoo/resolve/main/face3d/3DDFA_V2.mlpackage.zip",
           "archive": "zip",
-          "size_bytes": 6083375,
-          "sha256": "0f715dc220c046f558e3b8fc65246df9a2eec77182830a16628783430cdacdc8",
           "compute_units": "all",
           "kind": "model"
         }
       ],
-      "requirements": {
-        "min_ios": "17.0",
-        "min_ram_mb": 200
-      },
-      "license": {
-        "name": "MIT",
-        "url": "https://github.com/cleardusk/3DDFA_V2"
-      },
-      "upstream": {
-        "name": "cleardusk/3DDFA_V2",
-        "url": "https://github.com/cleardusk/3DDFA_V2",
-        "year": 2020
-      }
     },
     {
       "id": "hypersd",
       "name": "Hyper-SD (1-Step)",
       "subtitle": "ByteDance, 2024",
       "category_id": "generation",
-      "description_md": "Single-step text-to-image from SD1.5 via TCD distillation. 512×512. Chunked UNet (6-bit).",
       "demo": {
         "template": "text_to_image",
         "config": {
@@ -668,277 +399,226 @@
       "files": [
         {
           "name": "HyperSDTextEncoder.mlpackage.zip",
-          "url": "https://huggingface.co/mlboydaisuke/coreml-zoo/resolve/main/hypersd/HyperSDTextEncoder.mlpackage.zip",
           "archive": "zip",
-          "size_bytes": 226397794,
-          "sha256": "201b0fcc3573811aac6a4e8545c695bc4fb2f7710ea0d60c227919d87b37687e",
-          "compute_units": "cpuAndGPU",
           "kind": "model"
         },
         {
           "name": "HyperSDUnetChunk1.mlpackage.zip",
-          "url": "https://huggingface.co/mlboydaisuke/coreml-zoo/resolve/main/hypersd/HyperSDUnetChunk1.mlpackage.zip",
           "archive": "zip",
-          "size_bytes": 324819653,
-          "sha256": "279da11b8231aeeb9045f6ceabebb3a68c20a1b86ecc81aa6914b77ce76d5203",
           "compute_units": "cpuAndNeuralEngine",
           "kind": "model"
         },
         {
           "name": "HyperSDUnetChunk2.mlpackage.zip",
-          "url": "https://huggingface.co/mlboydaisuke/coreml-zoo/resolve/main/hypersd/HyperSDUnetChunk2.mlpackage.zip",
           "archive": "zip",
-          "size_bytes": 304530429,
-          "sha256": "0a700d11a105da589bb3e5666e38b9c72fa283149951b253fc11722e70e72faa",
           "compute_units": "cpuAndNeuralEngine",
           "kind": "model"
         },
         {
           "name": "HyperSDVAEDecoder.mlpackage.zip",
-          "url": "https://huggingface.co/mlboydaisuke/coreml-zoo/resolve/main/hypersd/HyperSDVAEDecoder.mlpackage.zip",
           "archive": "zip",
-          "size_bytes": 91282754,
-          "sha256": "1260371542d845a2261ed2de36c5fe3e9ccb740a6ceb59b1990705d125e8cf66",
-          "compute_units": "cpuAndGPU",
           "kind": "model"
         },
         {
           "name": "vocab.json",
-          "url": "https://huggingface.co/mlboydaisuke/coreml-zoo/resolve/main/hypersd/vocab.json",
-          "size_bytes": 1059962,
-          "sha256": "e089ad92ba36837a0d31433e555c8f45fe601ab5c221d4f607ded32d9f7a4349",
           "kind": "vocab"
         },
         {
           "name": "merges.txt",
-          "url": "https://huggingface.co/mlboydaisuke/coreml-zoo/resolve/main/hypersd/merges.txt",
-          "size_bytes": 524619,
-          "sha256": "9fd691f7c8039210e0fced15865466c65820d09b63988b0174bfe25de299051a",
           "kind": "vocab"
         }
       ],
-      "requirements": {
-        "min_ios": "17.0",
-        "min_ram_mb": 1000
-      },
-      "license": {
-        "name": "OpenRAIL-M",
-        "url": "https://huggingface.co/ByteDance/Hyper-SD"
-      },
-      "upstream": {
-        "name": "ByteDance/Hyper-SD",
-        "url": "https://huggingface.co/ByteDance/Hyper-SD",
-        "year": 2024
-      }
     },
     {
       "id": "matanyone",
       "name": "MatAnyone",
       "subtitle": "Video Matting, 2025",
       "category_id": "video",
-      "description_md": "Temporally consistent video matting. 5-model pipeline with memory propagation.",
       "demo": {
         "template": "video_matting",
         "config": {
           "frame_size": 512,
-          "encoder": "MatAnyone_encoder.mlpackage.zip",
-          "mask_encoder": "MatAnyone_mask_encoder.mlpackage.zip",
-          "read_first": "MatAnyone_read_first.mlpackage.zip",
-          "read": "MatAnyone_read.mlpackage.zip",
-          "decoder": "MatAnyone_decoder.mlpackage.zip"
         }
       },
       "files": [
         {
-          "name": "MatAnyone_encoder.mlpackage.zip",
-          "url": "https://huggingface.co/mlboydaisuke/coreml-zoo/resolve/main/matanyone/MatAnyone_encoder.mlpackage.zip",
           "archive": "zip",
-          "size_bytes": 17306121,
-          "sha256": "97ffd6bc4611f9a3351dc890fc00954ba48171e517e66a39f7a5f1f38110dfda",
-          "compute_units": "cpuAndGPU",
           "kind": "model"
         },
         {
-          "name": "MatAnyone_mask_encoder.mlpackage.zip",
-          "url": "https://huggingface.co/mlboydaisuke/coreml-zoo/resolve/main/matanyone/MatAnyone_mask_encoder.mlpackage.zip",
           "archive": "zip",
-          "size_bytes": 16819866,
-          "sha256": "ba67559188ffc64d8e46418c051c6a55815d4482def17519fa518daac7d5a911",
-          "compute_units": "cpuAndGPU",
           "kind": "model"
         },
         {
-          "name": "MatAnyone_read_first.mlpackage.zip",
-          "url": "https://huggingface.co/mlboydaisuke/coreml-zoo/resolve/main/matanyone/MatAnyone_read_first.mlpackage.zip",
           "archive": "zip",
-          "size_bytes": 21991849,
-          "sha256": "34daf7227dbcec7373a3fef175259fa7ec631ed8cb91d5595ca57ee9b22df7bb",
           "compute_units": "cpuOnly",
           "kind": "model"
         },
         {
-          "name": "MatAnyone_read.mlpackage.zip",
-          "url": "https://huggingface.co/mlboydaisuke/coreml-zoo/resolve/main/matanyone/MatAnyone_read.mlpackage.zip",
           "archive": "zip",
-          "size_bytes": 22135429,
-          "sha256": "052e52c0ffb7ff9ede448128950cd4c1c9a96589b6900c82b5104d99addb7fa5",
           "compute_units": "cpuOnly",
           "kind": "model"
         },
         {
-          "name": "MatAnyone_decoder.mlpackage.zip",
-          "url": "https://huggingface.co/mlboydaisuke/coreml-zoo/resolve/main/matanyone/MatAnyone_decoder.mlpackage.zip",
           "archive": "zip",
-          "size_bytes": 8807630,
-          "sha256": "67136aa67000e604838fe9aa7de151c514ef84f0b83f1da0f043cf70652d28eb",
-          "compute_units": "cpuAndGPU",
           "kind": "model"
         }
       ],
-      "requirements": {
-        "min_ios": "17.0",
-        "min_ram_mb": 800
-      },
-      "license": {
-        "name": "MIT",
-        "url": "https://github.com/pq-yang/MatAnyone"
-      },
-      "upstream": {
-        "name": "pq-yang/MatAnyone",
-        "url": "https://github.com/pq-yang/MatAnyone",
-        "year": 2025
-      }
     },
     {
       "id": "demucs",
       "name": "HTDemucs",
       "subtitle": "Audio Source Separation",
       "category_id": "audio",
-      "description_md": "Split music into 4 stems: drums, bass, vocals, other. 44.1 kHz stereo, FP32.",
       "demo": {
         "template": "audio_in_out",
         "config": {
           "sample_rate": 44100,
           "segment_length": 343980,
-          "output_stems": [
-            "drums",
-            "bass",
-            "vocals",
-            "other"
-          ]
         }
       },
       "files": [
         {
           "name": "HTDemucs_SourceSeparation_F32.mlpackage.zip",
-          "url": "https://huggingface.co/mlboydaisuke/coreml-zoo/resolve/main/demucs/HTDemucs_SourceSeparation_F32.mlpackage.zip",
           "archive": "zip",
-          "size_bytes": 79076395,
-          "sha256": "0fbb941e15a5b2fa425d14fe630ed4c14b6dee72780c1f5b2b05f58803bce5f7",
           "compute_units": "cpuOnly",
           "kind": "model"
         }
       ],
-      "requirements": {
-        "min_ios": "17.0",
-        "min_ram_mb": 1000
-      },
-      "license": {
-        "name": "MIT",
-        "url": "https://github.com/adefossez/demucs"
-      },
-      "upstream": {
-        "name": "adefossez/demucs",
-        "url": "https://github.com/adefossez/demucs",
-        "year": 2021
-      }
     },
     {
       "id": "kokoro",
       "name": "Kokoro-82M",
       "subtitle": "Multilingual TTS",
       "category_id": "speech",
-      "description_md": "English + Japanese text-to-speech. 24 kHz. StyleTTS2 + iSTFTNet vocoder. Multiple voices.",
       "demo": {
         "template": "text_to_audio",
         "config": {
           "mode": "tts",
           "sample_rate": 24000,
           "vocab_file": "kokoro_vocab.json",
-          "voices": [
-            "af_heart",
-            "af_bella",
-            "am_michael",
-            "bf_emma",
-            "bm_george"
-          ]
         }
       },
       "files": [
         {
           "name": "Kokoro_Predictor.mlpackage.zip",
-          "url": "https://huggingface.co/mlboydaisuke/coreml-zoo/resolve/main/kokoro/Kokoro_Predictor.mlpackage.zip",
           "archive": "zip",
-          "size_bytes": 72191470,
-          "sha256": "af1d55dc842980c32b5591a70f603941f11ab60a435bed0c13a107a8ef467bed",
           "compute_units": "cpuAndGPU",
           "kind": "model"
         },
         {
           "name": "Kokoro_Decoder_128.mlpackage.zip",
-          "url": "https://huggingface.co/mlboydaisuke/coreml-zoo/resolve/main/kokoro/Kokoro_Decoder_128.mlpackage.zip",
           "archive": "zip",
-          "size_bytes": 229120589,
-          "sha256": "cece0d072f5ba6aa3f729cf4c76b4de51823bcc65a26ab363c10441c3cd8b306",
           "compute_units": "all",
           "kind": "model"
         },
         {
           "name": "Kokoro_Decoder_256.mlpackage.zip",
-          "url": "https://huggingface.co/mlboydaisuke/coreml-zoo/resolve/main/kokoro/Kokoro_Decoder_256.mlpackage.zip",
           "archive": "zip",
-          "size_bytes": 229123438,
-          "sha256": "36d5e16d5c5ccb500fc96f1b07a1d5ac57b791f8e09e61b78319d76949003efe",
           "compute_units": "all",
           "kind": "model"
         },
         {
           "name": "Kokoro_Decoder_512.mlpackage.zip",
-          "url": "https://huggingface.co/mlboydaisuke/coreml-zoo/resolve/main/kokoro/Kokoro_Decoder_512.mlpackage.zip",
           "archive": "zip",
-          "size_bytes": 229128735,
-          "sha256": "0a44484c327e4fe8443b0bcf104d6964fe3f30d628c9e78aee3f31af7f2475dc",
           "compute_units": "all",
           "kind": "model"
         },
         {
           "name": "kokoro_vocab.json",
-          "url": "https://huggingface.co/mlboydaisuke/coreml-zoo/resolve/main/kokoro/kokoro_vocab.json",
-          "size_bytes": 1144,
-          "sha256": "70abefbe8a1c8865e43e0a43bbdc25b91a33e4aa053479d443ccf23e20a59e5d",
           "kind": "vocab"
         }
       ],
-      "requirements": {
-        "min_ios": "17.0",
-        "min_ram_mb": 1000
-      },
-      "license": {
-        "name": "Apache-2.0",
-        "url": "https://huggingface.co/hexgrad/Kokoro-82M"
-      },
-      "upstream": {
-        "name": "hexgrad/Kokoro-82M",
-        "url": "https://huggingface.co/hexgrad/Kokoro-82M",
-        "year": 2024
-      }
     },
     {
       "id": "stable_audio",
       "name": "Stable Audio Open",
       "subtitle": "Text-to-Music, 2024",
       "category_id": "speech",
-      "description_md": "Text-to-music. Up to 11.9s stereo 44.1 kHz. Rectified flow DiT + T5 + Oobleck VAE.",
       "demo": {
         "template": "text_to_audio",
         "config": {
@@ -950,150 +630,308 @@
       "files": [
         {
           "name": "StableAudioT5Encoder.mlpackage.zip",
-          "url": "https://huggingface.co/mlboydaisuke/coreml-zoo/resolve/main/stableaudio/StableAudioT5Encoder.mlpackage.zip",
           "archive": "zip",
-          "size_bytes": 98538259,
-          "sha256": "319a8ba775d309240253ced68a03a3923d0aec9a79f608044f9403bdcfe4b741",
           "compute_units": "cpuOnly",
           "kind": "model"
         },
         {
           "name": "StableAudioNumberEmbedder.mlpackage.zip",
-          "url": "https://huggingface.co/mlboydaisuke/coreml-zoo/resolve/main/stableaudio/StableAudioNumberEmbedder.mlpackage.zip",
           "archive": "zip",
-          "size_bytes": 376018,
-          "sha256": "04bdc5de00a2cf1c4a18f80c94f0d74ecfab41f3ad99f2fb7a031d6ff5af75da",
           "compute_units": "cpuOnly",
           "kind": "model"
         },
         {
           "name": "StableAudioDiT.mlpackage.zip",
-          "url": "https://huggingface.co/mlboydaisuke/coreml-zoo/resolve/main/stableaudio/StableAudioDiT.mlpackage.zip",
           "archive": "zip",
-          "size_bytes": 1265748504,
-          "sha256": "b17da4fc4df857821d39dbdf7d3bfe7062a2272ab3e5df1284d545afb54047e4",
           "compute_units": "cpuOnly",
           "kind": "model"
         },
         {
           "name": "StableAudioVAEDecoder.mlpackage.zip",
-          "url": "https://huggingface.co/mlboydaisuke/coreml-zoo/resolve/main/stableaudio/StableAudioVAEDecoder.mlpackage.zip",
           "archive": "zip",
-          "size_bytes": 144960275,
-          "sha256": "7207544cca9799cc1d6803c5e81badd0bb4527b2d3a64d5cab5700a5f19a9374",
           "compute_units": "cpuAndGPU",
           "kind": "model"
         },
         {
           "name": "t5_vocab.json",
-          "url": "https://huggingface.co/mlboydaisuke/coreml-zoo/resolve/main/stableaudio/t5_vocab.json",
-          "size_bytes": 749757,
-          "sha256": "7c9ff3ac1b3dbcaa617ee659f2df68688cfd44f1a5eb3be3fa0a2f56c749d56a",
           "kind": "vocab"
         }
       ],
-      "requirements": {
-        "min_ios": "17.0",
-        "min_ram_mb": 1200
       },
-      "license": {
-        "name": "custom",
-        "url": "https://huggingface.co/stabilityai/stable-audio-open-small"
       },
-      "upstream": {
-        "name": "stabilityai/stable-audio-open-small",
-        "url": "https://huggingface.co/stabilityai/stable-audio-open-small",
-        "year": 2024
-      }
     },
     {
       "id": "openvoice",
       "name": "OpenVoice V2",
       "subtitle": "Voice Cloning",
       "category_id": "audio",
-      "description_md": "Zero-shot voice conversion. Clone a speaker from ~10s reference audio.",
       "demo": {
         "template": "audio_in_out",
         "config": {
           "sample_rate": 22050,
-          "output_stems": [
-            "converted"
-          ]
         }
       },
       "files": [
         {
           "name": "OpenVoice_SpeakerEncoder.mlpackage.zip",
-          "url": "https://huggingface.co/mlboydaisuke/coreml-zoo/resolve/main/openvoice/OpenVoice_SpeakerEncoder.mlpackage.zip",
           "archive": "zip",
-          "size_bytes": 1519880,
-          "sha256": "c3f2a96aaf5ecb5c5afc62b3d3dfbd47dc7ae64bc3edb7aa68befb54aef74459",
           "compute_units": "cpuAndGPU",
           "kind": "model"
         },
         {
           "name": "OpenVoice_VoiceConverter.mlpackage.zip",
-          "url": "https://huggingface.co/mlboydaisuke/coreml-zoo/resolve/main/openvoice/OpenVoice_VoiceConverter.mlpackage.zip",
           "archive": "zip",
-          "size_bytes": 59799630,
-          "sha256": "ef3ce8a2d1564aefa13830d7d0ca43f85e0aa62d5f59622c8bc456c307ab5e05",
           "compute_units": "cpuAndGPU",
           "kind": "model"
         }
       ],
-      "requirements": {
-        "min_ios": "17.0",
-        "min_ram_mb": 500
       },
-      "license": {
-        "name": "MIT",
-        "url": "https://github.com/myshell-ai/OpenVoice"
       },
-      "upstream": {
-        "name": "myshell-ai/OpenVoice",
-        "url": "https://github.com/myshell-ai/OpenVoice",
-        "year": 2023
-      }
     },
     {
-      "id": "diarization",
-      "name": "Pyannote Diarization",
-      "subtitle": "Speaker Identification",
-      "category_id": "audio",
-      "description_md": "Speaker diarization: who spoke when. 16 kHz mono, 10s segments.",
       "demo": {
-        "template": "audio_in_out",
         "config": {
-          "sample_rate": 16000,
-          "output_stems": [
-            "speaker_timeline"
-          ]
         }
       },
       "files": [
         {
-          "name": "SpeakerSegmentation.mlpackage.zip",
-          "url": "https://huggingface.co/mlboydaisuke/coreml-zoo/resolve/main/diarization/SpeakerSegmentation.mlpackage.zip",
           "archive": "zip",
-          "size_bytes": 5327137,
-          "sha256": "dcfa2b98900f2b99029abfb593644b70418186a6ec2e94c9a79c2b3d7a84378a",
-          "compute_units": "cpuAndGPU",
           "kind": "model"
         }
       ],
-      "requirements": {
-        "min_ios": "17.0",
-        "min_ram_mb": 200
       },
-      "license": {
-        "name": "MIT",
-        "url": "https://github.com/pyannote/pyannote-audio"
       },
-      "upstream": {
-        "name": "pyannote/pyannote-audio",
-        "url": "https://github.com/pyannote/pyannote-audio",
-        "year": 2021
-      }
     }
   ]
-}

   "updated_at": "2026-04-10",
   "min_app_version": "1.0",
   "categories": [
+    { "id": "segmentation", "name": "Segmentation", "icon": "person.and.background.dotted", "order": 1 },
+    { "id": "enhancement", "name": "Image Enhancement", "icon": "wand.and.stars", "order": 2 },
+    { "id": "detection", "name": "Object Detection", "icon": "viewfinder", "order": 3 },
+    { "id": "depth", "name": "Depth & Geometry", "icon": "cube.transparent", "order": 4 },
+    { "id": "vision_language", "name": "Vision-Language", "icon": "text.viewfinder", "order": 5 },
+    { "id": "face", "name": "Face Processing", "icon": "face.smiling", "order": 6 },
+    { "id": "generation", "name": "Image Generation", "icon": "sparkles", "order": 7 },
+    { "id": "video", "name": "Video Processing", "icon": "film", "order": 8 },
+    { "id": "audio", "name": "Audio Processing", "icon": "waveform.circle", "order": 9 },
+    { "id": "speech", "name": "Speech & Music", "icon": "music.note", "order": 10 },
+    { "id": "inpainting", "name": "Inpainting", "icon": "eraser", "order": 11 },
+    { "id": "restoration", "name": "Face Restoration", "icon": "face.smiling.inverse", "order": 12 }
   ],
   "models": [
     {
       "id": "rmbg_1_4",
       "name": "RMBG-1.4",
       "subtitle": "BRIA AI, 2023",
       "category_id": "segmentation",
+      "description_md": "High-quality background removal. Outputs foreground with alpha mask. INT8 quantized U-Net, 1024×1024 input.",
       "demo": {
         "template": "image_in_out",
+        "config": { "input_size": 1024, "output_type": "mask" }
       },
       "files": [
         {
           "name": "RMBG_1_4.mlpackage.zip",
+          "url": "TODO",
           "archive": "zip",
+          "size_bytes": 50000000,
+          "sha256": "TODO",
+          "compute_units": "cpuAndGPU",
           "kind": "model"
         }
       ],
+      "requirements": { "min_ios": "17.0", "min_ram_mb": 300 },
+      "license": { "name": "Apache-2.0", "url": "https://huggingface.co/briaai/RMBG-1.4" },
+      "upstream": { "name": "briaai/RMBG-1.4", "url": "https://huggingface.co/briaai/RMBG-1.4", "year": 2023 }
     },
     {
       "id": "ddcolor",
+      "name": "DDColor",
       "subtitle": "Image Colorization, 2023",
       "category_id": "enhancement",
+      "description_md": "Automatic grayscale image colorization via dual decoders. 512×512 input, processes in LAB color space.",
       "demo": {
         "template": "image_in_out",
+        "config": { "input_size": 512, "output_type": "image" }
       },
       "files": [
         {
+          "name": "DDColor.mlpackage.zip",
+          "url": "TODO",
           "archive": "zip",
+          "size_bytes": 35000000,
+          "sha256": "TODO",
           "compute_units": "all",
           "kind": "model"
         }
       ],
+      "requirements": { "min_ios": "17.0", "min_ram_mb": 400 },
+      "license": { "name": "Apache-2.0", "url": "https://github.com/piddnad/DDColor" },
+      "upstream": { "name": "piddnad/DDColor", "url": "https://github.com/piddnad/DDColor", "year": 2023 }
     },
     {
       "id": "sinsr",
       "name": "SinSR",
       "subtitle": "Single-Step Super-Resolution, 2024",
       "category_id": "enhancement",
+      "description_md": "4× super-resolution via single-step diffusion. 256×256 input → 1024×1024 output. Swin Transformer denoiser (FP32 required).",
       "demo": {
         "template": "image_in_out",
+        "config": { "input_size": 256, "output_type": "image" }
       },
       "files": [
         {
           "name": "SinSR_Encoder.mlpackage.zip",
+          "url": "TODO",
           "archive": "zip",
+          "size_bytes": 40000000,
+          "sha256": "TODO",
           "compute_units": "cpuAndGPU",
           "kind": "model"
         },
         {
           "name": "SinSR_Denoiser.mlpackage.zip",
+          "url": "TODO",
           "archive": "zip",
+          "size_bytes": 440000000,
+          "sha256": "TODO",
           "compute_units": "cpuOnly",
           "kind": "model"
         },
         {
           "name": "SinSR_Decoder.mlpackage.zip",
+          "url": "TODO",
           "archive": "zip",
+          "size_bytes": 60000000,
+          "sha256": "TODO",
           "compute_units": "cpuAndGPU",
           "kind": "model"
         }
       ],
+      "requirements": { "min_ios": "17.0", "min_ram_mb": 600 },
+      "license": { "name": "Apache-2.0", "url": "https://github.com/wyf0912/SinSR" },
+      "upstream": { "name": "wyf0912/SinSR", "url": "https://github.com/wyf0912/SinSR", "year": 2024 }
     },
     {
+      "id": "efficientad",
+      "name": "EfficientAD",
+      "subtitle": "Anomaly Detection, 2023",
+      "category_id": "segmentation",
+      "description_md": "Lightweight unsupervised anomaly detection. 256×256 input → anomaly heatmap + score. Industrial quality inspection.",
       "demo": {
+        "template": "image_in_out",
+        "config": { "input_size": 256, "output_type": "image" }
       },
       "files": [
         {
+          "name": "EfficientAD.mlpackage.zip",
+          "url": "TODO",
           "archive": "zip",
+          "size_bytes": 8000000,
+          "sha256": "TODO",
           "compute_units": "all",
           "kind": "model"
         }
       ],
+      "requirements": { "min_ios": "17.0", "min_ram_mb": 200 },
+      "license": { "name": "MIT", "url": "https://github.com/nelson1425/EfficientAD" },
+      "upstream": { "name": "nelson1425/EfficientAD", "url": "https://github.com/nelson1425/EfficientAD", "year": 2023 }
     },
     {
+      "id": "yolo26s",
+      "name": "YOLO26s",
+      "subtitle": "NMS-Free Detection, 2026",
       "category_id": "detection",
+      "description_md": "NMS-free object detection. 640×640 input, output [1,300,6]: x1,y1,x2,y2,confidence,class_id. 80 COCO classes.",
       "demo": {
         "template": "image_detection",
+        "config": { "input_size": 640, "confidence_threshold": 0.25 }
       },
       "files": [
         {
+          "name": "yolo26s.mlpackage.zip",
+          "url": "TODO",
           "archive": "zip",
+          "size_bytes": 18000000,
+          "sha256": "TODO",
           "compute_units": "all",
           "kind": "model"
         }
       ],
+      "requirements": { "min_ios": "17.0", "min_ram_mb": 300 },
+      "license": { "name": "AGPL-3.0", "url": "https://github.com/ultralytics/ultralytics" },
+      "upstream": { "name": "ultralytics/ultralytics", "url": "https://github.com/ultralytics/ultralytics", "year": 2026 }
     },
     {
+      "id": "yolov9s",
+      "name": "YOLOv9s",
       "subtitle": "Object Detection, 2024",
       "category_id": "detection",
+      "description_md": "YOLOv9 small with Vision framework NMS. 640×640 input. PGI + GELAN architecture.",
       "demo": {
         "template": "image_detection",
+        "config": { "input_size": 640, "confidence_threshold": 0.25 }
       },
       "files": [
         {
+          "name": "yolov9s.mlpackage.zip",
+          "url": "TODO",
           "archive": "zip",
+          "size_bytes": 14000000,
+          "sha256": "TODO",
           "compute_units": "all",
           "kind": "model"
         }
       ],
+      "requirements": { "min_ios": "17.0", "min_ram_mb": 300 },
+      "license": { "name": "AGPL-3.0", "url": "https://github.com/WongKinYiu/yolov9" },
+      "upstream": { "name": "WongKinYiu/yolov9", "url": "https://github.com/WongKinYiu/yolov9", "year": 2024 }
     },
     {
+      "id": "yolov10n",
+      "name": "YOLOv10n",
+      "subtitle": "Object Detection, 2024",
       "category_id": "detection",
+      "description_md": "YOLOv10 nano with Vision framework NMS. 640×640 input. Dual-assignment strategy.",
       "demo": {
+        "template": "image_detection",
+        "config": { "input_size": 640, "confidence_threshold": 0.25 }
       },
       "files": [
         {
+          "name": "yolov10n.mlpackage.zip",
+          "url": "TODO",
           "archive": "zip",
+          "size_bytes": 14000000,
+          "sha256": "TODO",
           "compute_units": "all",
           "kind": "model"
         }
       ],
+      "requirements": { "min_ios": "17.0", "min_ram_mb": 300 },
+      "license": { "name": "AGPL-3.0", "url": "https://github.com/THU-MIG/yolov10" },
+      "upstream": { "name": "THU-MIG/yolov10", "url": "https://github.com/THU-MIG/yolov10", "year": 2024 }
     },
     {
       "id": "moge2_vitb_normal_504",
       "name": "MoGe-2 ViT-B (504×504)",
       "subtitle": "Microsoft, CVPR 2025",
       "category_id": "depth",
+      "description_md": "Monocular geometry from a single image. Predicts metric depth, surface normals, and a confidence mask in one forward pass. DINOv2 ViT-B/14 backbone.",
       "demo": {
         "template": "depth_visualization",
         "config": {
           "input_size": 504,
+          "output_keys": ["depth", "normal", "mask", "metric_scale"],
           "depth_unit": "meters"
         }
       },
       "files": [
         {
           "name": "MoGe2_ViTB_Normal_504.mlpackage.zip",
+          "url": "https://github.com/john-rocky/CoreML-Models/releases/download/moge2-v1/MoGe2_ViTB_Normal_504.mlpackage.zip",
           "archive": "zip",
+          "size_bytes": 209715200,
+          "sha256": "TODO",
           "compute_units": "all",
           "kind": "model"
         }
       ],
+      "requirements": { "min_ios": "17.0", "min_ram_mb": 600 },
+      "license": { "name": "MIT", "url": "https://github.com/microsoft/MoGe/blob/main/LICENSE" },
+      "upstream": { "name": "microsoft/MoGe", "url": "https://github.com/microsoft/MoGe", "year": 2025 }
     },
     {
       "id": "siglip",
       "name": "SigLIP",
       "subtitle": "Zero-Shot Classification, 2023",
       "category_id": "vision_language",
+      "description_md": "Zero-shot image classification. Dual encoder (image + text) with sigmoid loss. 224×224 input. Type any class names to classify.",
       "demo": {
         "template": "zero_shot_classify",
         "config": {
       "files": [
         {
           "name": "SigLIP_ImageEncoder.mlpackage.zip",
+          "url": "TODO",
           "archive": "zip",
+          "size_bytes": 350000000,
+          "sha256": "TODO",
           "compute_units": "cpuOnly",
           "kind": "model"
         },
         {
           "name": "SigLIP_TextEncoder.mlpackage.zip",
+          "url": "TODO",
           "archive": "zip",
+          "size_bytes": 350000000,
+          "sha256": "TODO",
           "compute_units": "cpuOnly",
           "kind": "model"
         },
         {
           "name": "siglip_vocab.json",
+          "url": "TODO",
+          "size_bytes": 1000000,
+          "sha256": "TODO",
           "kind": "vocab"
         }
       ],
+      "requirements": { "min_ios": "17.0", "min_ram_mb": 800 },
+      "license": { "name": "Apache-2.0", "url": "https://github.com/google-research/big_vision" },
+      "upstream": { "name": "google-research/big_vision", "url": "https://github.com/google-research/big_vision", "year": 2023 }
     },
     {
       "id": "florence2",
       "name": "Florence-2",
       "subtitle": "Microsoft, 2024",
       "category_id": "vision_language",
+      "description_md": "Vision-language captioning, OCR, and visual QA. Three-stage encoder-decoder. 768×768 input, autoregressive text output.",
       "demo": {
         "template": "image_to_text",
         "config": {
           "decoder": "Florence2Decoder.mlpackage.zip",
           "vocab_file": "florence2_vocab.json",
           "tasks": {
+            "caption": [0, 2264, 473, 5, 2274, 6190, 116, 2],
+            "detailed_caption": [0, 2264, 473, 5, 31962, 2274, 6190, 116, 2],
+            "ocr": [0, 2264, 473, 5, 71307, 116, 2]
           }
         }
       },
       "files": [
         {
           "name": "Florence2VisionEncoder.mlpackage.zip",
+          "url": "TODO",
           "archive": "zip",
+          "size_bytes": 400000000,
+          "sha256": "TODO",
           "compute_units": "cpuOnly",
           "kind": "model"
         },
         {
           "name": "Florence2TextEncoder.mlpackage.zip",
+          "url": "TODO",
           "archive": "zip",
+          "size_bytes": 450000000,
+          "sha256": "TODO",
           "compute_units": "cpuOnly",
           "kind": "model"
         },
         {
           "name": "Florence2Decoder.mlpackage.zip",
+          "url": "TODO",
           "archive": "zip",
+          "size_bytes": 1400000000,
+          "sha256": "TODO",
           "compute_units": "cpuOnly",
           "kind": "model"
         },
         {
           "name": "florence2_vocab.json",
+          "url": "TODO",
+          "size_bytes": 500000,
+          "sha256": "TODO",
           "kind": "vocab"
         }
       ],
+      "requirements": { "min_ios": "17.0", "min_ram_mb": 1200 },
+      "license": { "name": "MIT", "url": "https://huggingface.co/microsoft/Florence-2-base" },
+      "upstream": { "name": "microsoft/Florence-2", "url": "https://huggingface.co/microsoft/Florence-2-base", "year": 2024 }
     },
     {
+      "id": "adaface",
+      "name": "AdaFace",
+      "subtitle": "Face Recognition, 2022",
       "category_id": "face",
+      "description_md": "Face recognition via 512-dim embeddings. IR-18 backbone, 112×112 face crop input. Compare faces by cosine similarity.",
       "demo": {
+        "template": "face_compare",
+        "config": { "input_size": 112, "embedding_dim": 512, "match_threshold": 0.6 }
       },
       "files": [
         {
+          "name": "AdaFace_IR18_CASIA.mlpackage.zip",
+          "url": "TODO",
           "archive": "zip",
+          "size_bytes": 32000000,
+          "sha256": "TODO",
           "compute_units": "all",
           "kind": "model"
         }
       ],
+      "requirements": { "min_ios": "17.0", "min_ram_mb": 200 },
+      "license": { "name": "MIT", "url": "https://github.com/mk-minchul/AdaFace" },
+      "upstream": { "name": "mk-minchul/AdaFace", "url": "https://github.com/mk-minchul/AdaFace", "year": 2022 }
     },
     {
       "id": "hypersd",
       "name": "Hyper-SD (1-Step)",
       "subtitle": "ByteDance, 2024",
       "category_id": "generation",
+      "description_md": "Single-step text-to-image from SD1.5 via TCD distillation. 512×512 output. Chunked UNet (6-bit palettized) + TCD scheduler.",
       "demo": {
         "template": "text_to_image",
         "config": {
       "files": [
         {
           "name": "HyperSDTextEncoder.mlpackage.zip",
+          "url": "https://github.com/john-rocky/CoreML-Models/releases/download/hypersd-v1/HyperSDTextEncoder.mlpackage.zip",
           "archive": "zip",
+          "size_bytes": 235000000,
+          "sha256": "TODO",
+          "compute_units": "cpuAndNeuralEngine",
           "kind": "model"
         },
         {
           "name": "HyperSDUnetChunk1.mlpackage.zip",
+          "url": "https://github.com/john-rocky/CoreML-Models/releases/download/hypersd-v1/HyperSDUnetChunk1.mlpackage.zip",
           "archive": "zip",
+          "size_bytes": 318000000,
+          "sha256": "TODO",
           "compute_units": "cpuAndNeuralEngine",
           "kind": "model"
         },
         {
           "name": "HyperSDUnetChunk2.mlpackage.zip",
+          "url": "https://github.com/john-rocky/CoreML-Models/releases/download/hypersd-v1/HyperSDUnetChunk2.mlpackage.zip",
           "archive": "zip",
+          "size_bytes": 299000000,
+          "sha256": "TODO",
           "compute_units": "cpuAndNeuralEngine",
           "kind": "model"
         },
         {
           "name": "HyperSDVAEDecoder.mlpackage.zip",
+          "url": "https://github.com/john-rocky/CoreML-Models/releases/download/hypersd-v1/HyperSDVAEDecoder.mlpackage.zip",
           "archive": "zip",
+          "size_bytes": 95000000,
+          "sha256": "TODO",
+          "compute_units": "cpuAndNeuralEngine",
           "kind": "model"
         },
         {
           "name": "vocab.json",
+          "url": "https://github.com/john-rocky/CoreML-Models/releases/download/hypersd-v1/vocab.json",
+          "size_bytes": 1600000,
+          "sha256": "TODO",
           "kind": "vocab"
         },
         {
           "name": "merges.txt",
+          "url": "https://github.com/john-rocky/CoreML-Models/releases/download/hypersd-v1/merges.txt",
+          "size_bytes": 525000,
+          "sha256": "TODO",
           "kind": "vocab"
         }
       ],
+      "requirements": { "min_ios": "17.0", "min_ram_mb": 1000 },
+      "license": { "name": "OpenRAIL-M", "url": "https://huggingface.co/ByteDance/Hyper-SD" },
+      "upstream": { "name": "ByteDance/Hyper-SD", "url": "https://huggingface.co/ByteDance/Hyper-SD", "year": 2024 }
     },
     {
       "id": "matanyone",
       "name": "MatAnyone",
       "subtitle": "Video Matting, 2025",
       "category_id": "video",
+      "description_md": "Temporally consistent video matting with memory propagation. 5-model pipeline: encoder, mask encoder, read first, read, decoder. 768×432 landscape input.",
       "demo": {
         "template": "video_matting",
         "config": {
           "frame_size": 512,
+          "encoder": "MatAnyone_Encoder.mlpackage.zip",
+          "mask_encoder": "MatAnyone_MaskEncoder.mlpackage.zip",
+          "read_first": "MatAnyone_ReadFirst.mlpackage.zip",
+          "read": "MatAnyone_Read.mlpackage.zip",
+          "decoder": "MatAnyone_Decoder.mlpackage.zip"
         }
       },
       "files": [
         {
+          "name": "MatAnyone_Encoder.mlpackage.zip",
+          "url": "TODO",
           "archive": "zip",
+          "size_bytes": 20000000,
+          "sha256": "TODO",
+          "compute_units": "all",
           "kind": "model"
         },
         {
+          "name": "MatAnyone_MaskEncoder.mlpackage.zip",
+          "url": "TODO",
           "archive": "zip",
+          "size_bytes": 10000000,
+          "sha256": "TODO",
+          "compute_units": "all",
           "kind": "model"
         },
         {
+          "name": "MatAnyone_ReadFirst.mlpackage.zip",
+          "url": "TODO",
           "archive": "zip",
+          "size_bytes": 15000000,
+          "sha256": "TODO",
           "compute_units": "cpuOnly",
           "kind": "model"
         },
         {
+          "name": "MatAnyone_Read.mlpackage.zip",
+          "url": "TODO",
           "archive": "zip",
+          "size_bytes": 20000000,
+          "sha256": "TODO",
           "compute_units": "cpuOnly",
           "kind": "model"
         },
         {
+          "name": "MatAnyone_Decoder.mlpackage.zip",
+          "url": "TODO",
           "archive": "zip",
+          "size_bytes": 35000000,
+          "sha256": "TODO",
+          "compute_units": "all",
           "kind": "model"
         }
       ],
+      "requirements": { "min_ios": "17.0", "min_ram_mb": 800 },
+      "license": { "name": "MIT", "url": "https://github.com/pq-yang/MatAnyone" },
+      "upstream": { "name": "pq-yang/MatAnyone", "url": "https://github.com/pq-yang/MatAnyone", "year": 2025 }
     },
     {
       "id": "demucs",
       "name": "HTDemucs",
       "subtitle": "Audio Source Separation",
       "category_id": "audio",
+      "description_md": "Split music into 4 stems: drums, bass, vocals, other. 44.1 kHz stereo, overlap-add for full tracks. FP32 model.",
       "demo": {
         "template": "audio_in_out",
         "config": {
           "sample_rate": 44100,
           "segment_length": 343980,
+          "output_stems": ["drums", "bass", "vocals", "other"]
         }
       },
       "files": [
         {
           "name": "HTDemucs_SourceSeparation_F32.mlpackage.zip",
+          "url": "TODO",
           "archive": "zip",
+          "size_bytes": 360000000,
+          "sha256": "TODO",
           "compute_units": "cpuOnly",
           "kind": "model"
         }
       ],
+      "requirements": { "min_ios": "17.0", "min_ram_mb": 1000 },
+      "license": { "name": "MIT", "url": "https://github.com/adefossez/demucs" },
+      "upstream": { "name": "adefossez/demucs", "url": "https://github.com/adefossez/demucs", "year": 2021 }
     },
     {
       "id": "kokoro",
       "name": "Kokoro-82M",
       "subtitle": "Multilingual TTS",
       "category_id": "speech",
+      "description_md": "English + Japanese text-to-speech. 24 kHz mono. On-device G2P. StyleTTS2 + iSTFTNet vocoder. 10 voices, bucketed decoder (128/256/512).",
       "demo": {
         "template": "text_to_audio",
         "config": {
           "mode": "tts",
           "sample_rate": 24000,
           "vocab_file": "kokoro_vocab.json",
+          "voices": ["af_heart", "af_bella", "am_michael", "bf_emma", "bm_george"]
         }
       },
       "files": [
         {
           "name": "Kokoro_Predictor.mlpackage.zip",
+          "url": "TODO",
           "archive": "zip",
+          "size_bytes": 75000000,
+          "sha256": "TODO",
           "compute_units": "cpuAndGPU",
           "kind": "model"
         },
         {
           "name": "Kokoro_Decoder_128.mlpackage.zip",
+          "url": "TODO",
           "archive": "zip",
+          "size_bytes": 238000000,
+          "sha256": "TODO",
           "compute_units": "all",
           "kind": "model"
         },
         {
           "name": "Kokoro_Decoder_256.mlpackage.zip",
+          "url": "TODO",
           "archive": "zip",
+          "size_bytes": 241000000,
+          "sha256": "TODO",
           "compute_units": "all",
           "kind": "model"
         },
         {
           "name": "Kokoro_Decoder_512.mlpackage.zip",
+          "url": "TODO",
           "archive": "zip",
+          "size_bytes": 246000000,
+          "sha256": "TODO",
           "compute_units": "all",
           "kind": "model"
         },
         {
           "name": "kokoro_vocab.json",
+          "url": "TODO",
+          "size_bytes": 5000,
+          "sha256": "TODO",
           "kind": "vocab"
         }
       ],
+      "requirements": { "min_ios": "17.0", "min_ram_mb": 1000 },
+      "license": { "name": "Apache-2.0", "url": "https://huggingface.co/hexgrad/Kokoro-82M" },
+      "upstream": { "name": "hexgrad/Kokoro-82M", "url": "https://huggingface.co/hexgrad/Kokoro-82M", "year": 2024 }
     },
     {
       "id": "stable_audio",
       "name": "Stable Audio Open",
       "subtitle": "Text-to-Music, 2024",
       "category_id": "speech",
+      "description_md": "Text-to-music generation. Up to 11.9s stereo 44.1 kHz. Rectified flow DiT + T5 encoder + Oobleck VAE decoder.",
       "demo": {
         "template": "text_to_audio",
         "config": {
       "files": [
         {
           "name": "StableAudioT5Encoder.mlpackage.zip",
+          "url": "TODO",
           "archive": "zip",
+          "size_bytes": 105000000,
+          "sha256": "TODO",
           "compute_units": "cpuOnly",
           "kind": "model"
         },
         {
           "name": "StableAudioNumberEmbedder.mlpackage.zip",
+          "url": "TODO",
           "archive": "zip",
+          "size_bytes": 400000,
+          "sha256": "TODO",
           "compute_units": "cpuOnly",
           "kind": "model"
         },
         {
           "name": "StableAudioDiT.mlpackage.zip",
+          "url": "TODO",
           "archive": "zip",
+          "size_bytes": 326000000,
+          "sha256": "TODO",
           "compute_units": "cpuOnly",
           "kind": "model"
         },
         {
           "name": "StableAudioVAEDecoder.mlpackage.zip",
+          "url": "TODO",
           "archive": "zip",
+          "size_bytes": 149000000,
+          "sha256": "TODO",
           "compute_units": "cpuAndGPU",
           "kind": "model"
         },
         {
           "name": "t5_vocab.json",
+          "url": "TODO",
+          "size_bytes": 800000,
+          "sha256": "TODO",
           "kind": "vocab"
         }
       ],
+      "requirements": { "min_ios": "17.0", "min_ram_mb": 1200 },
+      "license": { "name": "custom", "url": "https://huggingface.co/stabilityai/stable-audio-open-small" },
+      "upstream": { "name": "stabilityai/stable-audio-open-small", "url": "https://huggingface.co/stabilityai/stable-audio-open-small", "year": 2024 }
+    },
+    {
+      "id": "basicpitch",
+      "name": "Basic Pitch",
+      "subtitle": "Spotify, Music Transcription",
+      "category_id": "audio",
+      "description_md": "Polyphonic music transcription: audio → MIDI notes. Tiny 17K-param model (272 KB). Windowed inference at 22.05 kHz.",
+      "demo": {
+        "template": "audio_to_score",
+        "config": {
+          "sample_rate": 22050,
+          "window_size": 43844,
+          "hop_size": 256,
+          "n_bins": 88,
+          "onset_threshold": 0.5,
+          "note_threshold": 0.5
+        }
       },
+      "files": [
+        {
+          "name": "nmp.mlpackage.zip",
+          "url": "TODO",
+          "archive": "zip",
+          "size_bytes": 272000,
+          "sha256": "TODO",
+          "compute_units": "all",
+          "kind": "model"
+        }
+      ],
+      "requirements": { "min_ios": "17.0", "min_ram_mb": 200 },
+      "license": { "name": "Apache-2.0", "url": "https://github.com/spotify/basic-pitch" },
+      "upstream": { "name": "spotify/basic-pitch", "url": "https://github.com/spotify/basic-pitch", "year": 2022 }
+    },
+    {
+      "id": "diarization",
+      "name": "Pyannote Diarization",
+      "subtitle": "Speaker Identification",
+      "category_id": "audio",
+      "description_md": "Speaker diarization: who spoke when. 16 kHz mono input, 10s segments. Outputs per-frame speaker logits.",
+      "demo": {
+        "template": "audio_in_out",
+        "config": {
+          "sample_rate": 16000,
+          "output_stems": ["speaker_timeline"]
+        }
       },
+      "files": [
+        {
+          "name": "Pyannote_Segmentation3_0.mlpackage.zip",
+          "url": "TODO",
+          "archive": "zip",
+          "size_bytes": 25000000,
+          "sha256": "TODO",
+          "compute_units": "cpuAndGPU",
+          "kind": "model"
+        }
+      ],
+      "requirements": { "min_ios": "17.0", "min_ram_mb": 200 },
+      "license": { "name": "MIT", "url": "https://github.com/pyannote/pyannote-audio" },
+      "upstream": { "name": "pyannote/pyannote-audio", "url": "https://github.com/pyannote/pyannote-audio", "year": 2021 }
     },
     {
       "id": "openvoice",
       "name": "OpenVoice V2",
       "subtitle": "Voice Cloning",
       "category_id": "audio",
+      "description_md": "Zero-shot voice conversion. Clone a speaker from ~10s reference audio. Speaker encoder + voice converter.",
       "demo": {
         "template": "audio_in_out",
         "config": {
           "sample_rate": 22050,
+          "output_stems": ["converted"]
         }
       },
       "files": [
         {
           "name": "OpenVoice_SpeakerEncoder.mlpackage.zip",
+          "url": "TODO",
           "archive": "zip",
+          "size_bytes": 35000000,
+          "sha256": "TODO",
           "compute_units": "cpuAndGPU",
           "kind": "model"
         },
         {
           "name": "OpenVoice_VoiceConverter.mlpackage.zip",
+          "url": "TODO",
           "archive": "zip",
+          "size_bytes": 100000000,
+          "sha256": "TODO",
           "compute_units": "cpuAndGPU",
           "kind": "model"
         }
       ],
+      "requirements": { "min_ios": "17.0", "min_ram_mb": 500 },
+      "license": { "name": "MIT", "url": "https://github.com/myshell-ai/OpenVoice" },
+      "upstream": { "name": "myshell-ai/OpenVoice", "url": "https://github.com/myshell-ai/OpenVoice", "year": 2023 }
+    },
+    {
+      "id": "realesrgan",
+      "name": "Real-ESRGAN 4x",
+      "subtitle": "Super Resolution, 2021",
+      "category_id": "enhancement",
+      "description_md": "Real-world blind super-resolution. 4× upscale from any input. Handles noise, blur, and JPEG artifacts. 512×512 input → 2048×2048 output.",
+      "demo": {
+        "template": "image_in_out",
+        "config": { "input_size": 512, "output_type": "image" }
       },
+      "files": [
+        {
+          "name": "RealESRGAN_x4.mlpackage.zip",
+          "url": "https://huggingface.co/mlboydaisuke/coreml-zoo/resolve/main/realesrgan/RealESRGAN_x4.mlpackage.zip",
+          "archive": "zip",
+          "size_bytes": 66857221,
+          "sha256": "6107dc417de87bf974e5b225a2632e2c78f2849265dc897981f482e922050ec9",
+          "compute_units": "all",
+          "kind": "model"
+        }
+      ],
+      "requirements": { "min_ios": "17.0", "min_ram_mb": 500 },
+      "license": { "name": "BSD-3-Clause", "url": "https://github.com/xinntao/Real-ESRGAN/blob/master/LICENSE" },
+      "upstream": { "name": "xinntao/Real-ESRGAN", "url": "https://github.com/xinntao/Real-ESRGAN", "year": 2021 }
+    },
+    {
+      "id": "gfpgan",
+      "name": "GFPGAN",
+      "subtitle": "Face Restoration, 2021",
+      "category_id": "restoration",
+      "description_md": "Blind face restoration with generative facial prior. Restores degraded face photos to high quality. 512×512 input/output.",
+      "demo": {
+        "template": "image_in_out",
+        "config": { "input_size": 512, "output_type": "image" }
       },
+      "files": [
+        {
+          "name": "GFPGAN.mlpackage.zip",
+          "url": "https://huggingface.co/mlboydaisuke/coreml-zoo/resolve/main/gfpgan/GFPGAN.mlpackage.zip",
+          "archive": "zip",
+          "size_bytes": 337392296,
+          "sha256": "218a39c226adecb2ccbc1e358023b80a5cf2510be85dfc3ab0da698fad51391a",
+          "compute_units": "all",
+          "kind": "model"
+        }
+      ],
+      "requirements": { "min_ios": "17.0", "min_ram_mb": 600 },
+      "license": { "name": "Apache-2.0", "url": "https://github.com/TencentARC/GFPGAN/blob/master/LICENSE" },
+      "upstream": { "name": "TencentARC/GFPGAN", "url": "https://github.com/TencentARC/GFPGAN", "year": 2021 }
     },
     {
+      "id": "rfdetr_n",
+      "name": "RF-DETR Nano",
+      "subtitle": "Object Detection, 2025",
+      "category_id": "detection",
+      "description_md": "End-to-end transformer detector. 384×384 input. 300 queries, 91 classes (COCO + background). No NMS needed. Output: confidence [300,91] + coordinates [300,4] in normalized cxcywh.",
       "demo": {
+        "template": "image_detection",
         "config": {
+          "input_size": 384,
+          "confidence_threshold": 0.5,
+          "output_format": "detr",
+          "num_classes": 91,
+          "background_class": 0
         }
       },
       "files": [
         {
+          "name": "rfdetr_n_coco.mlpackage.zip",
+          "url": "https://huggingface.co/mlboydaisuke/coreml-zoo/resolve/main/rfdetr/rfdetr_n_coco.mlpackage.zip",
           "archive": "zip",
+          "size_bytes": 99819094,
+          "sha256": "3cac3793b97aa88d5f79290afee24ba86e30da65e884933e3f8b0ba077ec48b4",
+          "compute_units": "all",
+          "kind": "model"
+        }
+      ],
+      "requirements": { "min_ios": "17.0", "min_ram_mb": 400 },
+      "license": { "name": "Apache-2.0", "url": "https://github.com/roboflow/rf-detr/blob/main/LICENSE" },
+      "upstream": { "name": "roboflow/rf-detr", "url": "https://github.com/roboflow/rf-detr", "year": 2025 }
+    },
+    {
+      "id": "face_parsing",
+      "name": "Face Parsing",
+      "subtitle": "Facial Segmentation, 2019",
+      "category_id": "segmentation",
+      "description_md": "Semantic face parsing into 19 regions: skin, nose, eyes, eyebrows, ears, mouth, lip, hair, hat, eyeglass, earring, necklace, neck, cloth, background. 512×512 input.",
+      "demo": {
+        "template": "image_in_out",
+        "config": { "input_size": 512, "output_type": "segmap", "num_classes": 19 }
+      },
+      "files": [
+        {
+          "name": "FaceParsing.mlpackage.zip",
+          "url": "https://huggingface.co/mlboydaisuke/coreml-zoo/resolve/main/faceparsing/FaceParsing.mlpackage.zip",
+          "archive": "zip",
+          "size_bytes": 53182369,
+          "sha256": "e7ebd6cc3f53486becc0dbf3b74027bc045aa4158402936ea09c3625682be6bb",
+          "compute_units": "all",
           "kind": "model"
         }
       ],
+      "requirements": { "min_ios": "17.0", "min_ram_mb": 300 },
+      "license": { "name": "MIT", "url": "https://github.com/zllrunning/face-parsing.PyTorch/blob/master/LICENSE" },
+      "upstream": { "name": "zllrunning/face-parsing.PyTorch", "url": "https://github.com/zllrunning/face-parsing.PyTorch", "year": 2019 }
+    },
+    {
+      "id": "mobilesam",
+      "name": "MobileSAM",
+      "subtitle": "Segment Anything, 2023",
+      "category_id": "segmentation",
+      "description_md": "Lightweight Segment Anything. Tap any point to generate a segmentation mask. ViT-Tiny encoder (13 MB) + lightweight decoder (9.8 MB). ~60× smaller than SAM.",
+      "demo": {
+        "template": "segment_anything",
+        "config": {
+          "encoder": "MobileSAM_Encoder.mlpackage.zip",
+          "decoder": "MobileSAM_Decoder.mlpackage.zip",
+          "input_size": 1024
+        }
       },
+      "files": [
+        {
+          "name": "MobileSAM.zip",
+          "url": "https://huggingface.co/mlboydaisuke/coreml-zoo/resolve/main/mobilesam/MobileSAM.zip",
+          "archive": "zip",
+          "size_bytes": 20143994,
+          "sha256": "0d8d48cb90a48cd860cc3105f54fdeca2a3cb75876a7c936e7243221e3f24681",
+          "compute_units": "all",
+          "kind": "model"
+        }
+      ],
+      "requirements": { "min_ios": "17.0", "min_ram_mb": 300 },
+      "license": { "name": "Apache-2.0", "url": "https://github.com/ChaoningZhang/MobileSAM/blob/master/LICENSE" },
+      "upstream": { "name": "ChaoningZhang/MobileSAM", "url": "https://github.com/ChaoningZhang/MobileSAM", "year": 2023 }
+    },
+    {
+      "id": "lama",
+      "name": "LaMa",
+      "subtitle": "Image Inpainting, 2022",
+      "category_id": "inpainting",
+      "description_md": "Resolution-robust large mask inpainting. Draw over unwanted objects to remove them. Fast Fourier convolutions for global context. 800×800 input.",
+      "demo": {
+        "template": "inpainting",
+        "config": { "input_size": 800 }
       },
+      "files": [
+        {
+          "name": "LaMa.mlpackage.zip",
+          "url": "https://huggingface.co/mlboydaisuke/coreml-zoo/resolve/main/lama/LaMa.mlpackage.zip",
+          "archive": "zip",
+          "size_bytes": 196237256,
+          "sha256": "b57b8451a1a86c00aea52d75230fb5f49d3076eec67403192758c9d2b59c0e69",
+          "compute_units": "all",
+          "kind": "model"
+        }
+      ],
+      "requirements": { "min_ios": "17.0", "min_ram_mb": 600 },
+      "license": { "name": "Apache-2.0", "url": "https://github.com/advimman/lama/blob/main/LICENSE" },
+      "upstream": { "name": "advimman/lama", "url": "https://github.com/advimman/lama", "year": 2022 }
     }
   ]
+}