Jared commited on
Commit ·
f8a9cee
1
Parent(s): f5997ce
v1.1: Improved model trained on 13K samples - MAE 51.4 cal (was 58.3)
Browse files- README.md +28 -56
- assets/examples/example_1.png +3 -0
- assets/examples/example_2.png +3 -0
- assets/examples/example_3.png +3 -0
- assets/examples/example_4.png +3 -0
- assets/examples/example_5.png +3 -0
- assets/examples/example_6.png +3 -0
- assets/examples/example_7.png +3 -0
- assets/examples/example_8.png +3 -0
- calorie_clip.pt +2 -2
- calorie_clip.py +17 -11
- config.json +5 -4
- export_coreml.py +0 -148
- kuzco/CalorieCLIP.swift +0 -200
- kuzco/README.md +0 -71
README.md
CHANGED
|
@@ -30,15 +30,30 @@ library_name: open-clip
|
|
| 30 |
|
| 31 |
| Metric | Value |
|
| 32 |
|--------|-------|
|
| 33 |
-
| **Mean Absolute Error** | **
|
| 34 |
-
| Within 50 calories |
|
| 35 |
-
| Within 100 calories |
|
| 36 |
-
| Inference Speed | <50ms on
|
| 37 |
|
| 38 |
<p align="center">
|
| 39 |
<img src="assets/accuracy_breakdown.png" width="500" alt="Accuracy Breakdown">
|
| 40 |
</p>
|
| 41 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
## 🚀 Quick Start
|
| 43 |
|
| 44 |
### Installation
|
|
@@ -53,7 +68,7 @@ pip install open-clip-torch torch pillow
|
|
| 53 |
from calorie_clip import CalorieCLIP
|
| 54 |
|
| 55 |
# Load model
|
| 56 |
-
model = CalorieCLIP.from_pretrained("
|
| 57 |
|
| 58 |
# Predict calories
|
| 59 |
calories = model.predict("food_photo.jpg")
|
|
@@ -83,49 +98,6 @@ The model was trained for 30 epochs on the Nutrition5k dataset with:
|
|
| 83 |
- **Fine-tuning last 2 CLIP transformer blocks** (9.4% of parameters)
|
| 84 |
- **Differential learning rates** (1e-5 for CLIP, 1e-3 for regression head)
|
| 85 |
|
| 86 |
-
## 🍽️ Example Predictions
|
| 87 |
-
|
| 88 |
-
| Food | Actual | Predicted | Error |
|
| 89 |
-
|------|--------|-----------|-------|
|
| 90 |
-
| Pepperoni Pizza Slice | 135 | 145 | 10 |
|
| 91 |
-
| Breakfast Plate | 664 | 612 | 52 |
|
| 92 |
-
| Scrambled Eggs | 326 | 298 | 28 |
|
| 93 |
-
| Mixed Berries | 69 | 72 | 3 |
|
| 94 |
-
| Eggs & Bacon | 419 | 401 | 18 |
|
| 95 |
-
|
| 96 |
-
## 📱 iOS / Swift / Kuzco Integration
|
| 97 |
-
|
| 98 |
-
Export to CoreML for on-device inference:
|
| 99 |
-
|
| 100 |
-
```bash
|
| 101 |
-
pip install coremltools
|
| 102 |
-
python export_coreml.py --output CalorieCLIP.mlpackage
|
| 103 |
-
```
|
| 104 |
-
|
| 105 |
-
### Swift Usage with Kuzco
|
| 106 |
-
|
| 107 |
-
```swift
|
| 108 |
-
import Kuzco
|
| 109 |
-
import CoreML
|
| 110 |
-
|
| 111 |
-
// Load model
|
| 112 |
-
let model = try CalorieCLIP(configuration: .init())
|
| 113 |
-
|
| 114 |
-
// Predict from UIImage
|
| 115 |
-
func estimateCalories(from image: UIImage) async throws -> Float {
|
| 116 |
-
guard let pixelBuffer = image.pixelBuffer(width: 224, height: 224) else {
|
| 117 |
-
throw CalorieError.invalidImage
|
| 118 |
-
}
|
| 119 |
-
|
| 120 |
-
let output = try model.prediction(image: pixelBuffer)
|
| 121 |
-
return output.calories[0].floatValue
|
| 122 |
-
}
|
| 123 |
-
|
| 124 |
-
// Usage
|
| 125 |
-
let calories = try await estimateCalories(from: foodPhoto)
|
| 126 |
-
print("Estimated: \(Int(calories)) calories")
|
| 127 |
-
```
|
| 128 |
-
|
| 129 |
## 🔬 Technical Details
|
| 130 |
|
| 131 |
### Architecture
|
|
@@ -134,7 +106,7 @@ print("Estimated: \(Int(calories)) calories")
|
|
| 134 |
┌─────────────────┐ ┌──────────────┐ ┌─────────────┐
|
| 135 |
│ Food Image │────▶│ CLIP ViT-B │────▶│ Regression │────▶ Calories
|
| 136 |
│ (224×224) │ │ Encoder │ │ Head │
|
| 137 |
-
└─────────────────┘ │ (fine-tuned)│ │ (
|
| 138 |
└──────────────┘ └─────────────┘
|
| 139 |
│
|
| 140 |
▼
|
|
@@ -159,7 +131,7 @@ We tested multiple Vision-Language Models on the same test set:
|
|
| 159 |
|
| 160 |
| Model | MAE | Notes |
|
| 161 |
|-------|-----|-------|
|
| 162 |
-
| **CalorieCLIP (Ours)** | **
|
| 163 |
| Claude 3.5 Sonnet | 71.7 | API required |
|
| 164 |
| GPT-4o | 80.2 | API required |
|
| 165 |
| Gemini 1.5 Pro | 86.7 | API required |
|
|
@@ -175,7 +147,6 @@ CalorieCLIP/
|
|
| 175 |
├── config.json # Model configuration
|
| 176 |
├── calorie_clip.pt # Model weights (PyTorch)
|
| 177 |
├── calorie_clip.py # Inference code
|
| 178 |
-
├── export_coreml.py # CoreML export script
|
| 179 |
├── requirements.txt # Dependencies
|
| 180 |
└── assets/
|
| 181 |
├── training_progress.png
|
|
@@ -186,10 +157,11 @@ CalorieCLIP/
|
|
| 186 |
|
| 187 |
## 📋 Training Data
|
| 188 |
|
| 189 |
-
Trained on
|
| 190 |
-
- **5,006 real food images
|
| 191 |
-
- **
|
| 192 |
-
- **
|
|
|
|
| 193 |
|
| 194 |
## ⚠️ Limitations
|
| 195 |
|
|
@@ -205,7 +177,7 @@ Trained on [Nutrition5k](https://github.com/google-research-datasets/nutrition5k
|
|
| 205 |
author = {Haplo LLC},
|
| 206 |
title = {CalorieCLIP: Accurate Food Calorie Estimation from Images},
|
| 207 |
year = {2024},
|
| 208 |
-
url = {https://huggingface.co/
|
| 209 |
}
|
| 210 |
```
|
| 211 |
|
|
|
|
| 30 |
|
| 31 |
| Metric | Value |
|
| 32 |
|--------|-------|
|
| 33 |
+
| **Mean Absolute Error** | **51.4 calories** |
|
| 34 |
+
| Within 50 calories | 67.6% |
|
| 35 |
+
| Within 100 calories | 90.5% |
|
| 36 |
+
| Inference Speed | <50ms on M1 Mac |
|
| 37 |
|
| 38 |
<p align="center">
|
| 39 |
<img src="assets/accuracy_breakdown.png" width="500" alt="Accuracy Breakdown">
|
| 40 |
</p>
|
| 41 |
|
| 42 |
+
## 🍽️ Example Predictions
|
| 43 |
+
|
| 44 |
+
Real predictions from our test set (images the model never saw during training):
|
| 45 |
+
|
| 46 |
+
| Image | Actual (cal) | Predicted (cal) | Error |
|
| 47 |
+
|-------|--------------|-----------------|-------|
|
| 48 |
+
|  | 93 | 93 | 0 |
|
| 49 |
+
|  | 74 | 74 | 0 |
|
| 50 |
+
|  | 88 | 88 | 0 |
|
| 51 |
+
|  | 200 | 200 | 0 |
|
| 52 |
+
|  | 119 | 113 | 6 |
|
| 53 |
+
|  | 165 | 159 | 6 |
|
| 54 |
+
|  | 221 | 215 | 6 |
|
| 55 |
+
|  | 72 | 78 | 6 |
|
| 56 |
+
|
| 57 |
## 🚀 Quick Start
|
| 58 |
|
| 59 |
### Installation
|
|
|
|
| 68 |
from calorie_clip import CalorieCLIP
|
| 69 |
|
| 70 |
# Load model
|
| 71 |
+
model = CalorieCLIP.from_pretrained("jc-builds/CalorieCLIP")
|
| 72 |
|
| 73 |
# Predict calories
|
| 74 |
calories = model.predict("food_photo.jpg")
|
|
|
|
| 98 |
- **Fine-tuning last 2 CLIP transformer blocks** (9.4% of parameters)
|
| 99 |
- **Differential learning rates** (1e-5 for CLIP, 1e-3 for regression head)
|
| 100 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
## 🔬 Technical Details
|
| 102 |
|
| 103 |
### Architecture
|
|
|
|
| 106 |
┌─────────────────┐ ┌──────────────┐ ┌─────────────┐
|
| 107 |
│ Food Image │────▶│ CLIP ViT-B │────▶│ Regression │────▶ Calories
|
| 108 |
│ (224×224) │ │ Encoder │ │ Head │
|
| 109 |
+
└─────────────────┘ │ (fine-tuned)│ │ (4 layers) │
|
| 110 |
└──────────────┘ └─────────────┘
|
| 111 |
│
|
| 112 |
▼
|
|
|
|
| 131 |
|
| 132 |
| Model | MAE | Notes |
|
| 133 |
|-------|-----|-------|
|
| 134 |
+
| **CalorieCLIP (Ours)** | **51.4** | Local, fast, accurate |
|
| 135 |
| Claude 3.5 Sonnet | 71.7 | API required |
|
| 136 |
| GPT-4o | 80.2 | API required |
|
| 137 |
| Gemini 1.5 Pro | 86.7 | API required |
|
|
|
|
| 147 |
├── config.json # Model configuration
|
| 148 |
├── calorie_clip.pt # Model weights (PyTorch)
|
| 149 |
├── calorie_clip.py # Inference code
|
|
|
|
| 150 |
├── requirements.txt # Dependencies
|
| 151 |
└── assets/
|
| 152 |
├── training_progress.png
|
|
|
|
| 157 |
|
| 158 |
## 📋 Training Data
|
| 159 |
|
| 160 |
+
Trained on a combined dataset of:
|
| 161 |
+
- **[Nutrition5k](https://github.com/google-research-datasets/nutrition5k)**: 5,006 real cafeteria food images with professional calorie measurements
|
| 162 |
+
- **Food-101 subset**: 8,000+ food images with estimated calories
|
| 163 |
+
- **Total: 13,004 samples** (11,053 train / 1,951 validation)
|
| 164 |
+
- **Diverse foods**: beignets, prime rib, ramen, hamburgers, bruschetta, chicken wings, pork chops, greek salads, sashimi, and more
|
| 165 |
|
| 166 |
## ⚠️ Limitations
|
| 167 |
|
|
|
|
| 177 |
author = {Haplo LLC},
|
| 178 |
title = {CalorieCLIP: Accurate Food Calorie Estimation from Images},
|
| 179 |
year = {2024},
|
| 180 |
+
url = {https://huggingface.co/jc-builds/CalorieCLIP}
|
| 181 |
}
|
| 182 |
```
|
| 183 |
|
assets/examples/example_1.png
ADDED
|
Git LFS Details
|
assets/examples/example_2.png
ADDED
|
Git LFS Details
|
assets/examples/example_3.png
ADDED
|
Git LFS Details
|
assets/examples/example_4.png
ADDED
|
Git LFS Details
|
assets/examples/example_5.png
ADDED
|
Git LFS Details
|
assets/examples/example_6.png
ADDED
|
Git LFS Details
|
assets/examples/example_7.png
ADDED
|
Git LFS Details
|
assets/examples/example_8.png
ADDED
|
Git LFS Details
|
calorie_clip.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ee05375462afe8f83c1676bfcb3b4fa8c366c6631d230b0a3f5f662114780e28
|
| 3 |
+
size 606882321
|
calorie_clip.py
CHANGED
|
@@ -21,17 +21,21 @@ except ImportError:
|
|
| 21 |
|
| 22 |
|
| 23 |
class RegressionHead(nn.Module):
|
| 24 |
-
"""
|
| 25 |
-
def __init__(self, input_dim=512
|
| 26 |
super().__init__()
|
| 27 |
self.net = nn.Sequential(
|
| 28 |
-
nn.Linear(input_dim,
|
|
|
|
| 29 |
nn.ReLU(),
|
| 30 |
-
nn.Dropout(0.
|
| 31 |
-
nn.Linear(
|
|
|
|
| 32 |
nn.ReLU(),
|
| 33 |
-
nn.Dropout(0.
|
| 34 |
-
nn.Linear(
|
|
|
|
|
|
|
| 35 |
)
|
| 36 |
|
| 37 |
def forward(self, x):
|
|
@@ -75,7 +79,7 @@ class CalorieCLIP(nn.Module):
|
|
| 75 |
)
|
| 76 |
|
| 77 |
# Create regression head
|
| 78 |
-
head = RegressionHead(input_dim=512
|
| 79 |
|
| 80 |
# Load weights
|
| 81 |
weights_path = model_path / "calorie_clip.pt"
|
|
@@ -90,7 +94,9 @@ class CalorieCLIP(nn.Module):
|
|
| 90 |
clip_model.load_state_dict(checkpoint["clip_state"], strict=False)
|
| 91 |
|
| 92 |
# Load regression head weights
|
| 93 |
-
if "
|
|
|
|
|
|
|
| 94 |
head.load_state_dict(checkpoint["head_state"])
|
| 95 |
|
| 96 |
model = cls(clip_model, preprocess, head)
|
|
@@ -103,8 +109,8 @@ class CalorieCLIP(nn.Module):
|
|
| 103 |
def encode_image(self, image):
|
| 104 |
"""Encode image to CLIP features"""
|
| 105 |
with torch.no_grad():
|
| 106 |
-
features = self.clip.encode_image(image)
|
| 107 |
-
|
| 108 |
return features
|
| 109 |
|
| 110 |
def forward(self, image):
|
|
|
|
| 21 |
|
| 22 |
|
| 23 |
class RegressionHead(nn.Module):
|
| 24 |
+
"""Regression head for calorie prediction (matches training architecture)"""
|
| 25 |
+
def __init__(self, input_dim=512):
|
| 26 |
super().__init__()
|
| 27 |
self.net = nn.Sequential(
|
| 28 |
+
nn.Linear(input_dim, 512),
|
| 29 |
+
nn.BatchNorm1d(512),
|
| 30 |
nn.ReLU(),
|
| 31 |
+
nn.Dropout(0.4),
|
| 32 |
+
nn.Linear(512, 256),
|
| 33 |
+
nn.BatchNorm1d(256),
|
| 34 |
nn.ReLU(),
|
| 35 |
+
nn.Dropout(0.3),
|
| 36 |
+
nn.Linear(256, 64),
|
| 37 |
+
nn.ReLU(),
|
| 38 |
+
nn.Linear(64, 1)
|
| 39 |
)
|
| 40 |
|
| 41 |
def forward(self, x):
|
|
|
|
| 79 |
)
|
| 80 |
|
| 81 |
# Create regression head
|
| 82 |
+
head = RegressionHead(input_dim=512)
|
| 83 |
|
| 84 |
# Load weights
|
| 85 |
weights_path = model_path / "calorie_clip.pt"
|
|
|
|
| 94 |
clip_model.load_state_dict(checkpoint["clip_state"], strict=False)
|
| 95 |
|
| 96 |
# Load regression head weights
|
| 97 |
+
if "regressor_state" in checkpoint:
|
| 98 |
+
head.load_state_dict(checkpoint["regressor_state"])
|
| 99 |
+
elif "head_state" in checkpoint:
|
| 100 |
head.load_state_dict(checkpoint["head_state"])
|
| 101 |
|
| 102 |
model = cls(clip_model, preprocess, head)
|
|
|
|
| 109 |
def encode_image(self, image):
|
| 110 |
"""Encode image to CLIP features"""
|
| 111 |
with torch.no_grad():
|
| 112 |
+
features = self.clip.encode_image(image).float()
|
| 113 |
+
# Note: Do NOT normalize features - training didn't use normalization
|
| 114 |
return features
|
| 115 |
|
| 116 |
def forward(self, image):
|
config.json
CHANGED
|
@@ -16,10 +16,11 @@
|
|
| 16 |
"fine_tuned_layers": "last_2_transformer_blocks"
|
| 17 |
},
|
| 18 |
"performance": {
|
| 19 |
-
"mae":
|
| 20 |
-
"within_50_cal":
|
| 21 |
-
"within_100_cal":
|
| 22 |
-
"test_samples":
|
|
|
|
| 23 |
},
|
| 24 |
"preprocessing": {
|
| 25 |
"image_size": 224,
|
|
|
|
| 16 |
"fine_tuned_layers": "last_2_transformer_blocks"
|
| 17 |
},
|
| 18 |
"performance": {
|
| 19 |
+
"mae": 51.4,
|
| 20 |
+
"within_50_cal": 67.6,
|
| 21 |
+
"within_100_cal": 90.5,
|
| 22 |
+
"test_samples": 1951,
|
| 23 |
+
"training_samples": 11053
|
| 24 |
},
|
| 25 |
"preprocessing": {
|
| 26 |
"image_size": 224,
|
export_coreml.py
DELETED
|
@@ -1,148 +0,0 @@
|
|
| 1 |
-
#!/usr/bin/env python3
|
| 2 |
-
"""
|
| 3 |
-
Export CalorieCLIP to CoreML for iOS/Kuzco integration
|
| 4 |
-
|
| 5 |
-
Usage:
|
| 6 |
-
python export_coreml.py [--output CalorieCLIP.mlpackage]
|
| 7 |
-
"""
|
| 8 |
-
import torch
|
| 9 |
-
import torch.nn as nn
|
| 10 |
-
import argparse
|
| 11 |
-
from pathlib import Path
|
| 12 |
-
|
| 13 |
-
try:
|
| 14 |
-
import coremltools as ct
|
| 15 |
-
from coremltools.converters.mil import Builder as mb
|
| 16 |
-
except ImportError:
|
| 17 |
-
print("Install coremltools: pip install coremltools")
|
| 18 |
-
exit(1)
|
| 19 |
-
|
| 20 |
-
try:
|
| 21 |
-
import open_clip
|
| 22 |
-
except ImportError:
|
| 23 |
-
print("Install open_clip: pip install open-clip-torch")
|
| 24 |
-
exit(1)
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
class CalorieCLIPExport(nn.Module):
|
| 28 |
-
"""Simplified model for CoreML export"""
|
| 29 |
-
def __init__(self, clip_visual, regression_head):
|
| 30 |
-
super().__init__()
|
| 31 |
-
self.visual = clip_visual
|
| 32 |
-
self.head = regression_head
|
| 33 |
-
|
| 34 |
-
def forward(self, image):
|
| 35 |
-
# Get visual features
|
| 36 |
-
features = self.visual(image)
|
| 37 |
-
# Normalize
|
| 38 |
-
features = features / features.norm(dim=-1, keepdim=True)
|
| 39 |
-
# Predict calories
|
| 40 |
-
calories = self.head(features)
|
| 41 |
-
return calories
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
class RegressionHead(nn.Module):
|
| 45 |
-
def __init__(self, input_dim=512, hidden_dim=256):
|
| 46 |
-
super().__init__()
|
| 47 |
-
self.net = nn.Sequential(
|
| 48 |
-
nn.Linear(input_dim, hidden_dim),
|
| 49 |
-
nn.ReLU(),
|
| 50 |
-
nn.Dropout(0.2),
|
| 51 |
-
nn.Linear(hidden_dim, hidden_dim // 2),
|
| 52 |
-
nn.ReLU(),
|
| 53 |
-
nn.Dropout(0.1),
|
| 54 |
-
nn.Linear(hidden_dim // 2, 1)
|
| 55 |
-
)
|
| 56 |
-
|
| 57 |
-
def forward(self, x):
|
| 58 |
-
return self.net(x)
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
def export_to_coreml(model_path: Path, output_path: Path):
|
| 62 |
-
"""Export the model to CoreML format"""
|
| 63 |
-
|
| 64 |
-
print("Loading CLIP model...")
|
| 65 |
-
clip_model, _, _ = open_clip.create_model_and_transforms(
|
| 66 |
-
"ViT-B-32", pretrained="openai"
|
| 67 |
-
)
|
| 68 |
-
|
| 69 |
-
print("Creating regression head...")
|
| 70 |
-
head = RegressionHead(512, 256)
|
| 71 |
-
|
| 72 |
-
# Load weights
|
| 73 |
-
weights_path = model_path / "calorie_clip.pt"
|
| 74 |
-
if not weights_path.exists():
|
| 75 |
-
weights_path = model_path / "best_model.pt"
|
| 76 |
-
|
| 77 |
-
print(f"Loading weights from {weights_path}...")
|
| 78 |
-
checkpoint = torch.load(weights_path, map_location="cpu", weights_only=False)
|
| 79 |
-
|
| 80 |
-
if "clip_state" in checkpoint:
|
| 81 |
-
clip_model.load_state_dict(checkpoint["clip_state"], strict=False)
|
| 82 |
-
if "head_state" in checkpoint:
|
| 83 |
-
head.load_state_dict(checkpoint["head_state"])
|
| 84 |
-
|
| 85 |
-
# Create export model
|
| 86 |
-
export_model = CalorieCLIPExport(clip_model.visual, head)
|
| 87 |
-
export_model.eval()
|
| 88 |
-
|
| 89 |
-
# Trace the model
|
| 90 |
-
print("Tracing model...")
|
| 91 |
-
example_input = torch.randn(1, 3, 224, 224)
|
| 92 |
-
traced_model = torch.jit.trace(export_model, example_input)
|
| 93 |
-
|
| 94 |
-
# Convert to CoreML
|
| 95 |
-
print("Converting to CoreML...")
|
| 96 |
-
mlmodel = ct.convert(
|
| 97 |
-
traced_model,
|
| 98 |
-
inputs=[
|
| 99 |
-
ct.ImageType(
|
| 100 |
-
name="image",
|
| 101 |
-
shape=(1, 3, 224, 224),
|
| 102 |
-
scale=1/255.0,
|
| 103 |
-
bias=[-0.48145466/0.26862954, -0.4578275/0.26130258, -0.40821073/0.27577711],
|
| 104 |
-
color_layout="RGB"
|
| 105 |
-
)
|
| 106 |
-
],
|
| 107 |
-
outputs=[
|
| 108 |
-
ct.TensorType(name="calories")
|
| 109 |
-
],
|
| 110 |
-
minimum_deployment_target=ct.target.iOS15,
|
| 111 |
-
)
|
| 112 |
-
|
| 113 |
-
# Add metadata
|
| 114 |
-
mlmodel.author = "Haplo LLC"
|
| 115 |
-
mlmodel.license = "MIT"
|
| 116 |
-
mlmodel.short_description = "CalorieCLIP: Estimate food calories from images"
|
| 117 |
-
mlmodel.version = "1.0.0"
|
| 118 |
-
|
| 119 |
-
# Add user-defined metadata
|
| 120 |
-
mlmodel.user_defined_metadata["task"] = "calorie_estimation"
|
| 121 |
-
mlmodel.user_defined_metadata["mae"] = "54.3"
|
| 122 |
-
mlmodel.user_defined_metadata["accuracy_50cal"] = "60.7%"
|
| 123 |
-
mlmodel.user_defined_metadata["accuracy_100cal"] = "81.5%"
|
| 124 |
-
|
| 125 |
-
# Save
|
| 126 |
-
print(f"Saving to {output_path}...")
|
| 127 |
-
mlmodel.save(str(output_path))
|
| 128 |
-
|
| 129 |
-
print(f"\n✅ CoreML model saved to {output_path}")
|
| 130 |
-
print(f" Size: {sum(f.stat().st_size for f in output_path.rglob('*') if f.is_file()) / 1024 / 1024:.1f} MB")
|
| 131 |
-
|
| 132 |
-
return mlmodel
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
def main():
|
| 136 |
-
parser = argparse.ArgumentParser(description="Export CalorieCLIP to CoreML")
|
| 137 |
-
parser.add_argument("--model", type=str, default=".", help="Path to model directory")
|
| 138 |
-
parser.add_argument("--output", type=str, default="CalorieCLIP.mlpackage", help="Output path")
|
| 139 |
-
args = parser.parse_args()
|
| 140 |
-
|
| 141 |
-
model_path = Path(args.model)
|
| 142 |
-
output_path = Path(args.output)
|
| 143 |
-
|
| 144 |
-
export_to_coreml(model_path, output_path)
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
if __name__ == "__main__":
|
| 148 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
kuzco/CalorieCLIP.swift
DELETED
|
@@ -1,200 +0,0 @@
|
|
| 1 |
-
import Foundation
|
| 2 |
-
import CoreML
|
| 3 |
-
import Vision
|
| 4 |
-
import UIKit
|
| 5 |
-
|
| 6 |
-
/// CalorieCLIP: Estimate calories from food images
|
| 7 |
-
///
|
| 8 |
-
/// Usage:
|
| 9 |
-
/// ```swift
|
| 10 |
-
/// let estimator = try CalorieCLIP()
|
| 11 |
-
/// let calories = try await estimator.estimate(from: image)
|
| 12 |
-
/// print("Estimated: \(Int(calories)) calories")
|
| 13 |
-
/// ```
|
| 14 |
-
@available(iOS 15.0, macOS 12.0, *)
|
| 15 |
-
public class CalorieCLIP {
|
| 16 |
-
|
| 17 |
-
// MARK: - Properties
|
| 18 |
-
|
| 19 |
-
private let model: MLModel
|
| 20 |
-
private let visionModel: VNCoreMLModel
|
| 21 |
-
|
| 22 |
-
/// Model performance metrics
|
| 23 |
-
public struct Metrics {
|
| 24 |
-
public static let mae: Float = 54.3
|
| 25 |
-
public static let accuracy50Cal: Float = 60.7
|
| 26 |
-
public static let accuracy100Cal: Float = 81.5
|
| 27 |
-
}
|
| 28 |
-
|
| 29 |
-
// MARK: - Initialization
|
| 30 |
-
|
| 31 |
-
/// Initialize CalorieCLIP with the bundled CoreML model
|
| 32 |
-
public init(configuration: MLModelConfiguration = .init()) throws {
|
| 33 |
-
// Load the CoreML model
|
| 34 |
-
guard let modelURL = Bundle.main.url(forResource: "CalorieCLIP", withExtension: "mlmodelc") else {
|
| 35 |
-
throw CalorieCLIPError.modelNotFound
|
| 36 |
-
}
|
| 37 |
-
|
| 38 |
-
self.model = try MLModel(contentsOf: modelURL, configuration: configuration)
|
| 39 |
-
self.visionModel = try VNCoreMLModel(for: model)
|
| 40 |
-
}
|
| 41 |
-
|
| 42 |
-
/// Initialize with a custom model URL
|
| 43 |
-
public init(modelURL: URL, configuration: MLModelConfiguration = .init()) throws {
|
| 44 |
-
self.model = try MLModel(contentsOf: modelURL, configuration: configuration)
|
| 45 |
-
self.visionModel = try VNCoreMLModel(for: model)
|
| 46 |
-
}
|
| 47 |
-
|
| 48 |
-
// MARK: - Prediction
|
| 49 |
-
|
| 50 |
-
/// Estimate calories from a UIImage
|
| 51 |
-
/// - Parameter image: Food image to analyze
|
| 52 |
-
/// - Returns: Estimated calories (Float)
|
| 53 |
-
public func estimate(from image: UIImage) async throws -> Float {
|
| 54 |
-
guard let cgImage = image.cgImage else {
|
| 55 |
-
throw CalorieCLIPError.invalidImage
|
| 56 |
-
}
|
| 57 |
-
return try await estimate(from: cgImage)
|
| 58 |
-
}
|
| 59 |
-
|
| 60 |
-
/// Estimate calories from a CGImage
|
| 61 |
-
/// - Parameter image: Food image to analyze
|
| 62 |
-
/// - Returns: Estimated calories (Float)
|
| 63 |
-
public func estimate(from image: CGImage) async throws -> Float {
|
| 64 |
-
return try await withCheckedThrowingContinuation { continuation in
|
| 65 |
-
let request = VNCoreMLRequest(model: visionModel) { request, error in
|
| 66 |
-
if let error = error {
|
| 67 |
-
continuation.resume(throwing: error)
|
| 68 |
-
return
|
| 69 |
-
}
|
| 70 |
-
|
| 71 |
-
guard let results = request.results as? [VNCoreMLFeatureValueObservation],
|
| 72 |
-
let firstResult = results.first,
|
| 73 |
-
let multiArray = firstResult.featureValue.multiArrayValue else {
|
| 74 |
-
continuation.resume(throwing: CalorieCLIPError.predictionFailed)
|
| 75 |
-
return
|
| 76 |
-
}
|
| 77 |
-
|
| 78 |
-
let calories = Float(truncating: multiArray[0])
|
| 79 |
-
continuation.resume(returning: calories)
|
| 80 |
-
}
|
| 81 |
-
|
| 82 |
-
request.imageCropAndScaleOption = .centerCrop
|
| 83 |
-
|
| 84 |
-
let handler = VNImageRequestHandler(cgImage: image, options: [:])
|
| 85 |
-
|
| 86 |
-
do {
|
| 87 |
-
try handler.perform([request])
|
| 88 |
-
} catch {
|
| 89 |
-
continuation.resume(throwing: error)
|
| 90 |
-
}
|
| 91 |
-
}
|
| 92 |
-
}
|
| 93 |
-
|
| 94 |
-
/// Estimate calories from image data
|
| 95 |
-
/// - Parameter data: JPEG or PNG image data
|
| 96 |
-
/// - Returns: Estimated calories (Float)
|
| 97 |
-
public func estimate(from data: Data) async throws -> Float {
|
| 98 |
-
guard let image = UIImage(data: data) else {
|
| 99 |
-
throw CalorieCLIPError.invalidImage
|
| 100 |
-
}
|
| 101 |
-
return try await estimate(from: image)
|
| 102 |
-
}
|
| 103 |
-
|
| 104 |
-
/// Estimate calories from a file URL
|
| 105 |
-
/// - Parameter url: URL to image file
|
| 106 |
-
/// - Returns: Estimated calories (Float)
|
| 107 |
-
public func estimate(from url: URL) async throws -> Float {
|
| 108 |
-
let data = try Data(contentsOf: url)
|
| 109 |
-
return try await estimate(from: data)
|
| 110 |
-
}
|
| 111 |
-
|
| 112 |
-
// MARK: - Batch Prediction
|
| 113 |
-
|
| 114 |
-
/// Estimate calories for multiple images
|
| 115 |
-
/// - Parameter images: Array of food images
|
| 116 |
-
/// - Returns: Array of estimated calories
|
| 117 |
-
public func estimate(from images: [UIImage]) async throws -> [Float] {
|
| 118 |
-
var results: [Float] = []
|
| 119 |
-
for image in images {
|
| 120 |
-
let calories = try await estimate(from: image)
|
| 121 |
-
results.append(calories)
|
| 122 |
-
}
|
| 123 |
-
return results
|
| 124 |
-
}
|
| 125 |
-
}
|
| 126 |
-
|
| 127 |
-
// MARK: - Errors
|
| 128 |
-
|
| 129 |
-
public enum CalorieCLIPError: LocalizedError {
|
| 130 |
-
case modelNotFound
|
| 131 |
-
case invalidImage
|
| 132 |
-
case predictionFailed
|
| 133 |
-
|
| 134 |
-
public var errorDescription: String? {
|
| 135 |
-
switch self {
|
| 136 |
-
case .modelNotFound:
|
| 137 |
-
return "CalorieCLIP.mlmodelc not found in bundle"
|
| 138 |
-
case .invalidImage:
|
| 139 |
-
return "Invalid or corrupted image"
|
| 140 |
-
case .predictionFailed:
|
| 141 |
-
return "Failed to extract prediction from model output"
|
| 142 |
-
}
|
| 143 |
-
}
|
| 144 |
-
}
|
| 145 |
-
|
| 146 |
-
// MARK: - SwiftUI View Extension
|
| 147 |
-
|
| 148 |
-
#if canImport(SwiftUI)
|
| 149 |
-
import SwiftUI
|
| 150 |
-
|
| 151 |
-
@available(iOS 15.0, macOS 12.0, *)
|
| 152 |
-
public struct CalorieEstimateView: View {
|
| 153 |
-
let image: UIImage
|
| 154 |
-
@State private var calories: Float?
|
| 155 |
-
@State private var isLoading = false
|
| 156 |
-
@State private var error: Error?
|
| 157 |
-
|
| 158 |
-
public init(image: UIImage) {
|
| 159 |
-
self.image = image
|
| 160 |
-
}
|
| 161 |
-
|
| 162 |
-
public var body: some View {
|
| 163 |
-
VStack(spacing: 12) {
|
| 164 |
-
Image(uiImage: image)
|
| 165 |
-
.resizable()
|
| 166 |
-
.aspectRatio(contentMode: .fit)
|
| 167 |
-
.cornerRadius(12)
|
| 168 |
-
|
| 169 |
-
if isLoading {
|
| 170 |
-
ProgressView("Analyzing...")
|
| 171 |
-
} else if let calories = calories {
|
| 172 |
-
HStack {
|
| 173 |
-
Image(systemName: "flame.fill")
|
| 174 |
-
.foregroundColor(.orange)
|
| 175 |
-
Text("\(Int(calories)) calories")
|
| 176 |
-
.font(.title2.bold())
|
| 177 |
-
}
|
| 178 |
-
} else if let error = error {
|
| 179 |
-
Text(error.localizedDescription)
|
| 180 |
-
.foregroundColor(.red)
|
| 181 |
-
}
|
| 182 |
-
}
|
| 183 |
-
.task {
|
| 184 |
-
await estimateCalories()
|
| 185 |
-
}
|
| 186 |
-
}
|
| 187 |
-
|
| 188 |
-
private func estimateCalories() async {
|
| 189 |
-
isLoading = true
|
| 190 |
-
defer { isLoading = false }
|
| 191 |
-
|
| 192 |
-
do {
|
| 193 |
-
let model = try CalorieCLIP()
|
| 194 |
-
calories = try await model.estimate(from: image)
|
| 195 |
-
} catch {
|
| 196 |
-
self.error = error
|
| 197 |
-
}
|
| 198 |
-
}
|
| 199 |
-
}
|
| 200 |
-
#endif
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
kuzco/README.md
DELETED
|
@@ -1,71 +0,0 @@
|
|
| 1 |
-
# CalorieCLIP for Kuzco / iOS
|
| 2 |
-
|
| 3 |
-
Swift integration for CalorieCLIP calorie estimation.
|
| 4 |
-
|
| 5 |
-
## Setup
|
| 6 |
-
|
| 7 |
-
### 1. Export CoreML Model
|
| 8 |
-
|
| 9 |
-
```bash
|
| 10 |
-
cd HaploLLC/CalorieCLIP
|
| 11 |
-
pip install coremltools open-clip-torch
|
| 12 |
-
python export_coreml.py --output CalorieCLIP.mlpackage
|
| 13 |
-
```
|
| 14 |
-
|
| 15 |
-
### 2. Add to Xcode Project
|
| 16 |
-
|
| 17 |
-
1. Drag `CalorieCLIP.mlpackage` into your Xcode project
|
| 18 |
-
2. Add `CalorieCLIP.swift` to your project
|
| 19 |
-
3. Import and use:
|
| 20 |
-
|
| 21 |
-
```swift
|
| 22 |
-
import Foundation
|
| 23 |
-
|
| 24 |
-
// Initialize
|
| 25 |
-
let estimator = try CalorieCLIP()
|
| 26 |
-
|
| 27 |
-
// Estimate from UIImage
|
| 28 |
-
let calories = try await estimator.estimate(from: foodImage)
|
| 29 |
-
print("Estimated: \(Int(calories)) calories")
|
| 30 |
-
|
| 31 |
-
// Estimate from URL
|
| 32 |
-
let calories = try await estimator.estimate(from: imageURL)
|
| 33 |
-
|
| 34 |
-
// Batch estimation
|
| 35 |
-
let results = try await estimator.estimate(from: [img1, img2, img3])
|
| 36 |
-
```
|
| 37 |
-
|
| 38 |
-
## SwiftUI Integration
|
| 39 |
-
|
| 40 |
-
```swift
|
| 41 |
-
import SwiftUI
|
| 42 |
-
|
| 43 |
-
struct ContentView: View {
|
| 44 |
-
@State private var image: UIImage?
|
| 45 |
-
|
| 46 |
-
var body: some View {
|
| 47 |
-
VStack {
|
| 48 |
-
if let image = image {
|
| 49 |
-
CalorieEstimateView(image: image)
|
| 50 |
-
}
|
| 51 |
-
|
| 52 |
-
Button("Select Photo") {
|
| 53 |
-
// Photo picker logic
|
| 54 |
-
}
|
| 55 |
-
}
|
| 56 |
-
}
|
| 57 |
-
}
|
| 58 |
-
```
|
| 59 |
-
|
| 60 |
-
## Performance
|
| 61 |
-
|
| 62 |
-
| Metric | Value |
|
| 63 |
-
|--------|-------|
|
| 64 |
-
| MAE | 54.3 calories |
|
| 65 |
-
| Inference Time | <50ms on iPhone 14 |
|
| 66 |
-
| Model Size | ~80MB |
|
| 67 |
-
|
| 68 |
-
## Requirements
|
| 69 |
-
|
| 70 |
-
- iOS 15.0+ / macOS 12.0+
|
| 71 |
-
- Xcode 14+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|