YuC13600 commited on
Commit
36a6756
·
verified ·
1 Parent(s): f187f17

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -1,35 +1 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
  *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  *.pt filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
.gitignore ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *.egg-info/
5
+
6
+ # Virtual environments
7
+ .venv/
8
+
9
+ # Benchmark data and scripts (not for HF)
10
+ 2022sample/
11
+ 2023sample/
12
+ benchmark.py
13
+ embed_config.py
14
+ results_*.json
15
+
16
+ # Extracted features
17
+ features/
18
+ *.h5
19
+
20
+ # IDE
21
+ .idea/
22
+ .vscode/
23
+ *.swp
24
+ *.swo
25
+
26
+ # OS
27
+ .DS_Store
28
+ Thumbs.db
README(zh-tw).md ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: gpl-3.0
3
+ tags:
4
+ - coral-reef
5
+ - re-identification
6
+ - metric-learning
7
+ - dinov2
8
+ - dinov3
9
+ - pytorch
10
+ datasets:
11
+ - custom
12
+ pipeline_tag: image-feature-extraction
13
+ ---
14
+
15
+ # 珊瑚個體辨識模型
16
+
17
+ 針對水下珊瑚個體跨年辨識的微調模型。
18
+
19
+ 本專案包含兩個最佳模型與獨立推論腳本,無需依賴訓練程式碼(`coral_reid`)即可運作。
20
+
21
+ > 原始碼:[GitHub](https://github.com/YuC13600/coral_models)
22
+
23
+ ## 最佳模型
24
+
25
+ ### 最高精度 — E3-01b DINOv2 ViT-B/14
26
+
27
+ | | |
28
+ |---|---|
29
+ | **N-Benchmark Top-1** | **86.6%** (110/127) |
30
+ | Top-3 / Top-5 / Top-10 | 96.9% / 97.6% / 100.0% |
31
+ | 平均排名 / 最差排名 | 1.30 / 9 |
32
+ | Backbone | DINOv2 ViT-B/14 (86.6M 參數, timm 518×518) |
33
+ | 損失函數 | Triplet (margin=0.3) + Hard Mining |
34
+ | 取樣器 | AreaAwareSampler (area_ratio=0.75) |
35
+ | 訓練 | 4 階段漸進式解凍,56 epochs,約 7.2 小時 |
36
+ | 嵌入維度 | 1280-d,L2 正規化 |
37
+ | 檔案 | `e3_01b_dinov2_vitb_best/` |
38
+
39
+ ### 最高效率 — DINOv3 ViT-S+/16
40
+
41
+ | | |
42
+ |---|---|
43
+ | **N-Benchmark Top-1** | **81.1%** (103/127) |
44
+ | Top-3 / Top-5 / Top-10 | 92.1% / 95.3% / 99.2% |
45
+ | 平均排名 | 1.61 |
46
+ | Backbone | DINOv3 ViT-S+/16 (~22M 參數, timm 512×512) |
47
+ | 損失函數 | Triplet (margin=0.3) + Hard Mining |
48
+ | 取樣器 | MPerClassSampler (m=2) |
49
+ | 訓練 | 4 階段漸進式解凍,63 epochs,約 2.0 小時 |
50
+ | 嵌入維度 | 768-d,L2 正規化 |
51
+ | 檔案 | `dinov3_vitsplus_efficient/` |
52
+
53
+ ### 模型比較
54
+
55
+ | 指標 | 最高精度 | 最高效率 | 差距 |
56
+ |------|---------|---------|------|
57
+ | Top-1 | 86.6% | 81.1% | -5.5% |
58
+ | 參數量 | ~86.6M | ~22M | **-75%** |
59
+ | 模型大小 | 339 MB | 112 MB | **-67%** |
60
+ | 訓練時間 | ~7.2h | ~2.0h | **-72%** |
61
+ | 推論 tokens | 1369 (patch14) | 1024 (patch16) | -25% |
62
+
63
+ ## 快速開始
64
+
65
+ ```bash
66
+ # 安裝依賴(獨立環境,不需要 coral_reid)
67
+ uv sync
68
+
69
+ # 提取單張圖片特徵
70
+ uv run python extract_features.py \
71
+ --model e3_01b_dinov2_vitb_best/best_model_20260308_110634.pt \
72
+ --input /path/to/image.jpg
73
+
74
+ # 提取整個目錄的特徵
75
+ uv run python extract_features.py \
76
+ --model e3_01b_dinov2_vitb_best/best_model_20260308_110634.pt \
77
+ --input /path/to/images/ \
78
+ --output features.h5
79
+
80
+ ```
81
+
82
+ ## N-Benchmark 各區域結果
83
+
84
+ ### E3-01b DINOv2 ViT-B/14(最佳)
85
+
86
+ | 區域 | 查詢數 | Top-1 | Top-3 | Top-5 | 平均排名 |
87
+ |------|--------|-------|-------|-------|----------|
88
+ | 37 | 32 | 93.8% | 96.9% | 96.9% | 1.28 |
89
+ | 38 | 31 | 80.6% | 100.0% | 100.0% | 1.19 |
90
+ | 39 | 27 | 85.2% | 92.6% | 96.3% | 1.44 |
91
+ | 40 | 37 | 86.5% | 97.3% | 97.3% | 1.30 |
92
+ | **整體** | **127** | **86.6%** | **96.9%** | **97.6%** | **1.30** |
93
+
94
+ ### DINOv3 ViT-S+/16(高效率)
95
+
96
+ | 區域 | 查詢數 | Top-1 | Top-3 | Top-5 | 平均排名 |
97
+ |------|--------|-------|-------|-------|----------|
98
+ | 37 | 32 | 81.2% | 93.8% | 96.9% | 1.56 |
99
+ | 38 | 31 | 77.4% | 90.3% | 93.5% | 1.90 |
100
+ | 39 | 27 | 85.2% | 92.6% | 96.3% | 1.37 |
101
+ | 40 | 37 | 81.1% | 91.9% | 94.6% | 1.57 |
102
+ | **整體** | **127** | **81.1%** | **92.1%** | **95.3%** | **1.61** |
103
+
104
+ ## 完整模型歷史
105
+
106
+ ### 模型比較表
107
+
108
+ | 模型名稱 | 架構 | Backbone | 損失函數 | 挖掘方式 | 同區域負樣本 | 圖片 | 測試準確率 | 測試損失 | 驗證損失 | N-Bench 平均 | A37 | A38 | A39 | A40 | 時間 |
109
+ | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |
110
+ | 預訓練 | - | DINOv2-B/14 | - | - | - | bbox | - | - | - | 29.48% | 28.12% | 35.48% | 29.63% | 24.32% | - |
111
+ | 20250812_152526 | 舊 | DINOv2-B/14 | Triplet | 預組三元組 | ❌ | bbox | 92.6% | 0.1659 | - | 48.25% | 50.00% | 51.61% | 48.15% | 43.24% | ~16h |
112
+ | 20251007_133126 | 舊 | DINOv2-B/14 | Triplet | 預組三元組 | ✅ | bbox | 88.8% | 0.2523 | - | 39.32% | 46.88% | 41.94% | 33.33% | 35.14% | ~16h |
113
+ | 20251008_094017 | 舊 | DINOv2-B/14 | Triplet | 預組三元組 | ✅ | bbox | 90.4% | 0.1636 | - | 40.19% | 37.50% | 48.39% | 37.04% | 37.84% | ~16h |
114
+ | 20251014_183603 | 舊 | DINOv2-B/14 | Triplet | 預組三元組 | ❌ | bbox | 92.8% | 0.1012 | - | 40.97% | 37.50% | 38.71% | 44.44% | 43.24% | ~16h |
115
+ | 預訓練 | - | DINOv2-B/14 | - | - | - | whole | - | - | - | 50.88% | 34.38% | 54.84% | 62.96% | 51.35% | - |
116
+ | 20251015_165008 | 舊 | DINOv2-B/14 | Triplet | 預組三元組 | ✅ | whole | 92.7% | 0.1330 | 0.1006 | 64.43% | 62.50% | 61.29% | 55.56% | 78.38% | ~16h |
117
+ | 20251016_133229 | 舊 | DINOv2-B/14 | Triplet | 預組三元組 | ❌ | whole | 97.9% | 0.0429 | - | 63.31% | 56.25% | 58.06% | 74.07% | 64.86% | ~16h |
118
+ | **20260308_110634** | **新** | **DINOv2-B/14 (timm 518)** | **Triplet** | **動態 (PML)** | **AreaAware 0.75** | whole | - | - | **0.1604** | **86.6%** | **93.8%** | **80.6%** | **85.2%** | **86.5%** | **~7.2h** |
119
+ | **20260306_233824** | **新** | **DINOv3-S+/16 (timm 512)** | **Triplet** | **動態 (PML)** | ❌ | whole | - | - | **0.1604** | **81.1%** | **81.2%** | **77.4%** | **85.2%** | **81.1%** | **~2.0h** |
120
+
121
+ ### 欄位說明
122
+
123
+ | 欄位 | 說明 |
124
+ | --- | --- |
125
+ | 架構 | `舊` = 舊專案實作,`新` = 重構後的模組化架構 |
126
+ | Backbone | 特徵提取器(DINOv2-B/14、DINOv3-S+/16 等) |
127
+ | 損失函數 | 損失函數類型(Triplet、ArcFace、CosFace、Circle、Contrastive 等) |
128
+ | 挖掘方式 | 樣本挖掘策略:`預組三元組` = 固定三元組,`動態 (PML)` = MPerClassSampler |
129
+ | 同區域負樣本 | 是否限制負樣本來自同一地理區域(`AreaAware 0.75` = 75% 同區域) |
130
+ | 圖片 | `bbox` = EXIF 邊界框裁切,`whole` = 完整圖片 |
131
+ | 測試準確率 | 測試集準確率(僅舊架構,衡量 pos_dist < neg_dist) |
132
+ | 驗證損失 | 訓練期間最佳驗證損失 |
133
+ | N-Bench 平均 | N-Benchmark Top-1 準確率(區域 37-40 平均) |
134
+
135
+ ### 架構差異
136
+
137
+ | 特性 | 舊架構 | 新架構 |
138
+ | --- | --- | --- |
139
+ | Dataset 輸出 | `(anchor, pos, neg)` - 3 張圖片 | `(image, label)` - 1 張圖片 |
140
+ | 三元組形成 | 訓練前預先組成 | 每批次動態挖掘 |
141
+ | 批次取樣器 | 隨機 | MPerClassSampler (m=2) |
142
+ | 損失函數 | 自訂 TripletLossWithMining | PML TripletMarginLoss |
143
+ | 每 Epoch 樣本數 | ~50,000 三元組 × 3 張圖片 | ~4,000 張圖片 |
144
+ | 訓練速度 | ~23 分鐘/epoch | ~1.5 分鐘/epoch |
145
+ | 同區域負樣本 | 已實作 | 已實作(AreaAwareSampler) |
146
+
147
+ > **N-Benchmark(最近鄰基準測試)**:在區域 37-40 中,跨 2022 與 2023 年比對珊瑚標本時,正確辨識的 Top-1 準確率。
148
+
149
+ ## 專案結構
150
+
151
+ ```
152
+ coral_models/
153
+ ├── pyproject.toml # uv 環境(獨立)
154
+ ├── extract_features.py # 特徵提取腳本
155
+ ├── e3_01b_dinov2_vitb_best/ # 最高精度模型 (86.6%)
156
+ │ ├── best_model_20260308_110634.pt
157
+ │ ├── final_model_20260308_110634.pt
158
+ │ ├── e3_01b_same_area_neg_075.yaml
159
+ │ ├── README.md
160
+ │ └── README(zh-tw).md
161
+ ├── dinov3_vitsplus_efficient/ # 最高效率模型 (81.1%)
162
+ │ ├── best_model_20260306_233824.pt
163
+ │ ├── final_model_20260306_233824.pt
164
+ │ ├── dinov3_vitsplus_tune_02_p2lr5_4ph.yaml
165
+ │ ├── README.md
166
+ │ └── README(zh-tw).md
167
+ └── legacy/ # 舊架構模型 (torch.hub, 224×224)
168
+ ├── dinov2_coral_best_model_20251015_165008.pt # 64.43%(舊最佳,同區域負樣本,完整圖片)
169
+ └── dinov2_coral_best_model_20251016_133229.pt # 63.31%(無同區域負樣本,完整圖片)
170
+ ```
171
+
172
+ ## 授權條款
173
+
174
+ 本專案採用 GPL-3.0 授權。
175
+
176
+ 基於 Meta Platforms, Inc. 的 DINOv2 與 DINOv3(Apache License 2.0)。
README.md CHANGED
@@ -1,3 +1,176 @@
1
- ---
2
- license: gpl-3.0
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: gpl-3.0
3
+ tags:
4
+ - coral-reef
5
+ - re-identification
6
+ - metric-learning
7
+ - dinov2
8
+ - dinov3
9
+ - pytorch
10
+ datasets:
11
+ - custom
12
+ pipeline_tag: image-feature-extraction
13
+ ---
14
+
15
+ # Coral Re-Identification Models
16
+
17
+ Fine-tuned models for underwater coral individual re-identification across multiple years.
18
+
19
+ This repository contains the two best models and standalone inference scripts. No dependency on the training codebase (`coral_reid`) is required.
20
+
21
+ > Source code: [GitHub](https://github.com/YuC13600/coral_models)
22
+
23
+ ## Best Models
24
+
25
+ ### Best Accuracy — E3-01b DINOv2 ViT-B/14
26
+
27
+ | | |
28
+ |---|---|
29
+ | **N-Benchmark Top-1** | **86.6%** (110/127) |
30
+ | Top-3 / Top-5 / Top-10 | 96.9% / 97.6% / 100.0% |
31
+ | Avg Rank / Worst Rank | 1.30 / 9 |
32
+ | Backbone | DINOv2 ViT-B/14 (86.6M params, timm 518×518) |
33
+ | Loss | Triplet (margin=0.3) + Hard Mining |
34
+ | Sampler | AreaAwareSampler (area_ratio=0.75) |
35
+ | Training | 4-phase progressive unfreezing, 56 epochs, ~7.2h |
36
+ | Embedding | 1280-d, L2-normalized |
37
+ | Files | `e3_01b_dinov2_vitb_best/` |
38
+
39
+ ### Most Efficient — DINOv3 ViT-S+/16
40
+
41
+ | | |
42
+ |---|---|
43
+ | **N-Benchmark Top-1** | **81.1%** (103/127) |
44
+ | Top-3 / Top-5 / Top-10 | 92.1% / 95.3% / 99.2% |
45
+ | Avg Rank | 1.61 |
46
+ | Backbone | DINOv3 ViT-S+/16 (~22M params, timm 512×512) |
47
+ | Loss | Triplet (margin=0.3) + Hard Mining |
48
+ | Sampler | MPerClassSampler (m=2) |
49
+ | Training | 4-phase progressive unfreezing, 63 epochs, ~2.0h |
50
+ | Embedding | 768-d, L2-normalized |
51
+ | Files | `dinov3_vitsplus_efficient/` |
52
+
53
+ ### Comparison
54
+
55
+ | Metric | Best Accuracy | Most Efficient | Difference |
56
+ |--------|--------------|----------------|------------|
57
+ | Top-1 | 86.6% | 81.1% | -5.5% |
58
+ | Parameters | ~86.6M | ~22M | **-75%** |
59
+ | Model size | 339 MB | 112 MB | **-67%** |
60
+ | Training time | ~7.2h | ~2.0h | **-72%** |
61
+ | Inference tokens | 1369 (patch14) | 1024 (patch16) | -25% |
62
+
63
+ ## Quick Start
64
+
65
+ ```bash
66
+ # Install dependencies (standalone, no coral_reid needed)
67
+ uv sync
68
+
69
+ # Extract features from a single image
70
+ uv run python extract_features.py \
71
+ --model e3_01b_dinov2_vitb_best/best_model_20260308_110634.pt \
72
+ --input /path/to/image.jpg
73
+
74
+ # Extract features from a directory
75
+ uv run python extract_features.py \
76
+ --model e3_01b_dinov2_vitb_best/best_model_20260308_110634.pt \
77
+ --input /path/to/images/ \
78
+ --output features.h5
79
+
80
+ ```
81
+
82
+ ## N-Benchmark Per-Area Results
83
+
84
+ ### E3-01b DINOv2 ViT-B/14 (Best)
85
+
86
+ | Area | Queries | Top-1 | Top-3 | Top-5 | Avg Rank |
87
+ |------|---------|-------|-------|-------|----------|
88
+ | 37 | 32 | 93.8% | 96.9% | 96.9% | 1.28 |
89
+ | 38 | 31 | 80.6% | 100.0% | 100.0% | 1.19 |
90
+ | 39 | 27 | 85.2% | 92.6% | 96.3% | 1.44 |
91
+ | 40 | 37 | 86.5% | 97.3% | 97.3% | 1.30 |
92
+ | **Overall** | **127** | **86.6%** | **96.9%** | **97.6%** | **1.30** |
93
+
94
+ ### DINOv3 ViT-S+/16 (Efficient)
95
+
96
+ | Area | Queries | Top-1 | Top-3 | Top-5 | Avg Rank |
97
+ |------|---------|-------|-------|-------|----------|
98
+ | 37 | 32 | 81.2% | 93.8% | 96.9% | 1.56 |
99
+ | 38 | 31 | 77.4% | 90.3% | 93.5% | 1.90 |
100
+ | 39 | 27 | 85.2% | 92.6% | 96.3% | 1.37 |
101
+ | 40 | 37 | 81.1% | 91.9% | 94.6% | 1.57 |
102
+ | **Overall** | **127** | **81.1%** | **92.1%** | **95.3%** | **1.61** |
103
+
104
+ ## Full Model History
105
+
106
+ ### Model Comparison Table
107
+
108
+ | Model Name | Arch | Backbone | Loss | Mining | Same Area Neg | Image | Test Acc | Test Loss | Val Loss | N-Bench Avg | A37 | A38 | A39 | A40 | Time |
109
+ | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |
110
+ | Pre-trained | - | DINOv2-B/14 | - | - | - | bbox | - | - | - | 29.48% | 28.12% | 35.48% | 29.63% | 24.32% | - |
111
+ | 20250812_152526 | old | DINOv2-B/14 | Triplet | pre-composed | ❌ | bbox | 92.6% | 0.1659 | - | 48.25% | 50.00% | 51.61% | 48.15% | 43.24% | ~16h |
112
+ | 20251007_133126 | old | DINOv2-B/14 | Triplet | pre-composed | ✅ | bbox | 88.8% | 0.2523 | - | 39.32% | 46.88% | 41.94% | 33.33% | 35.14% | ~16h |
113
+ | 20251008_094017 | old | DINOv2-B/14 | Triplet | pre-composed | ✅ | bbox | 90.4% | 0.1636 | - | 40.19% | 37.50% | 48.39% | 37.04% | 37.84% | ~16h |
114
+ | 20251014_183603 | old | DINOv2-B/14 | Triplet | pre-composed | ❌ | bbox | 92.8% | 0.1012 | - | 40.97% | 37.50% | 38.71% | 44.44% | 43.24% | ~16h |
115
+ | Pre-trained | - | DINOv2-B/14 | - | - | - | whole | - | - | - | 50.88% | 34.38% | 54.84% | 62.96% | 51.35% | - |
116
+ | 20251015_165008 | old | DINOv2-B/14 | Triplet | pre-composed | ✅ | whole | 92.7% | 0.1330 | 0.1006 | 64.43% | 62.50% | 61.29% | 55.56% | 78.38% | ~16h |
117
+ | 20251016_133229 | old | DINOv2-B/14 | Triplet | pre-composed | ❌ | whole | 97.9% | 0.0429 | - | 63.31% | 56.25% | 58.06% | 74.07% | 64.86% | ~16h |
118
+ | **20260308_110634** | **new** | **DINOv2-B/14 (timm 518)** | **Triplet** | **dynamic (PML)** | **AreaAware 0.75** | whole | - | - | **0.1604** | **86.6%** | **93.8%** | **80.6%** | **85.2%** | **86.5%** | **~7.2h** |
119
+ | **20260306_233824** | **new** | **DINOv3-S+/16 (timm 512)** | **Triplet** | **dynamic (PML)** | ❌ | whole | - | - | **0.1604** | **81.1%** | **81.2%** | **77.4%** | **85.2%** | **81.1%** | **~2.0h** |
120
+
121
+ ### Column Descriptions
122
+
123
+ | Column | Description |
124
+ | --- | --- |
125
+ | Arch | `old` = old_repo implementation, `new` = refactored modular architecture |
126
+ | Backbone | Feature extractor (DINOv2-B/14, DINOv3-S+/16, etc.) |
127
+ | Loss | Loss function (Triplet, ArcFace, CosFace, Circle, Contrastive, etc.) |
128
+ | Mining | Sample mining: `pre-composed` = fixed triplets, `dynamic (PML)` = MPerClassSampler |
129
+ | Same Area Neg | Whether negatives restricted to same geographic area (`AreaAware 0.75` = 75% same area) |
130
+ | Image | `bbox` = EXIF bounding box crop, `whole` = full image |
131
+ | Test Acc | Test set accuracy (old arch only, measures pos_dist < neg_dist) |
132
+ | Val Loss | Best validation loss during training |
133
+ | N-Bench Avg | N-Benchmark Top-1 accuracy averaged across areas 37-40 |
134
+
135
+ ### Architecture Differences
136
+
137
+ | Feature | Old Architecture | New Architecture |
138
+ | --- | --- | --- |
139
+ | Dataset Output | `(anchor, pos, neg)` - 3 images | `(image, label)` - 1 image |
140
+ | Triplet Formation | Pre-composed before training | Dynamic mining per batch |
141
+ | Batch Sampler | Random | MPerClassSampler (m=2) |
142
+ | Loss Function | Custom TripletLossWithMining | PML TripletMarginLoss |
143
+ | Samples per Epoch | ~50,000 triplets x 3 images | ~4,000 images |
144
+ | Training Speed | ~23 min/epoch | ~1.5 min/epoch |
145
+ | Same Area Negatives | Implemented | Implemented (AreaAwareSampler) |
146
+
147
+ > **N-Benchmark (Nearest Benchmark)**: Top-1 accuracy rate of identifying the correct coral when comparing specimens in areas 37-40 across 2022 and 2023.
148
+
149
+ ## Project Structure
150
+
151
+ ```
152
+ coral_models/
153
+ ├── pyproject.toml # uv environment (standalone)
154
+ ├── extract_features.py # Feature extraction script
155
+ ├── e3_01b_dinov2_vitb_best/ # Best accuracy model (86.6%)
156
+ │ ├── best_model_20260308_110634.pt
157
+ │ ├── final_model_20260308_110634.pt
158
+ │ ├── e3_01b_same_area_neg_075.yaml
159
+ │ ├── README.md
160
+ │ └── README(zh-tw).md
161
+ ├── dinov3_vitsplus_efficient/ # Most efficient model (81.1%)
162
+ │ ├── best_model_20260306_233824.pt
163
+ │ ├── final_model_20260306_233824.pt
164
+ │ ├── dinov3_vitsplus_tune_02_p2lr5_4ph.yaml
165
+ │ ├── README.md
166
+ │ └── README(zh-tw).md
167
+ └── legacy/ # Old architecture models (torch.hub, 224×224)
168
+ ├── dinov2_coral_best_model_20251015_165008.pt # 64.43% (old best, same area neg, whole image)
169
+ └── dinov2_coral_best_model_20251016_133229.pt # 63.31% (no same area neg, whole image)
170
+ ```
171
+
172
+ ## License
173
+
174
+ This project is licensed under GPL-3.0.
175
+
176
+ Based on DINOv2 and DINOv3 by Meta Platforms, Inc. (Apache License 2.0).
dinov3_vitsplus_efficient/README(zh-tw).md ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: gpl-3.0
3
+ tags:
4
+ - coral-reef
5
+ - re-identification
6
+ - metric-learning
7
+ - dinov3
8
+ - pytorch
9
+ datasets:
10
+ - custom
11
+ pipeline_tag: image-feature-extraction
12
+ ---
13
+
14
+ # 珊瑚個體辨識:DINOv3 ViT-S+/16(高效率)
15
+
16
+ 針對水下珊瑚個體辨識微調的 DINOv3 ViT-S+/16 模型。此為本專案中的**最高效率模型**,僅以約 22M 參數和約 2 小時訓練時間,達到 **81.1% N-Benchmark Top-1 準確率**。
17
+
18
+ ## 模型規格
19
+
20
+ | | |
21
+ |---|---|
22
+ | **架構** | DINOv3 ViT-S+/16 (~22M 參數) |
23
+ | **Backbone 載入方式** | timm (`vit_small_plus_patch16_dinov3`) |
24
+ | **輸入尺寸** | 512 x 512 |
25
+ | **嵌入維度** | 768 |
26
+ | **Backbone 輸出維度** | 384 |
27
+ | **Head** | MLP (384 → 512 → 768, BatchNorm, Dropout 0.3) |
28
+
29
+ ## 訓練配置
30
+
31
+ | | |
32
+ |---|---|
33
+ | **損失函數** | Triplet Loss (margin=0.3) + Hard Mining |
34
+ | **取樣器** | MPerClassSampler (m=2) |
35
+ | **批次大小** | 16(累積步數:8,等效批次:128) |
36
+ | **優化器** | AdamW (weight_decay=1e-4) |
37
+ | **梯度裁剪** | 1.0 |
38
+ | **Early stopping** | patience=6, delta=0.0005 |
39
+ | **總 epochs** | 63 |
40
+ | **訓練時間** | 約 2.0 小時(單 GPU) |
41
+
42
+ ### 漸進式解凍(4 階段)
43
+
44
+ | 階段 | 解凍層數 | 學習率 | 最大 Epochs |
45
+ |------|----------|--------|-------------|
46
+ | 1 — 僅 Head | 0(僅 head) | 3e-4 | 20 |
47
+ | 2 — 最後 2 blocks | 2 | 5e-5 | 20 |
48
+ | 3 — 最後 4 blocks | 4 | 1.5e-5 | 15 |
49
+ | 4 — 最後 6 blocks | 6 | 1e-5 | 15 |
50
+
51
+ Phase 2 學習率從預設的 8e-5 降至 5e-5,避免 early stopping 過早觸發,讓 Phase 3 有更好的起點。Phase 4 進一步釋放模型容量。
52
+
53
+ ## 評估結果(N-Benchmark)
54
+
55
+ 跨年匹配:2022(參考集)vs 2023(查詢集),區域 37-40。
56
+
57
+ | 區域 | 查詢數 | Top-1 | Top-3 | Top-5 | 平均排名 |
58
+ |------|--------|-------|-------|-------|----------|
59
+ | 37 | 32 | 81.2% | 93.8% | 96.9% | 1.56 |
60
+ | 38 | 31 | 77.4% | 90.3% | 93.5% | 1.90 |
61
+ | 39 | 27 | 85.2% | 92.6% | 96.3% | 1.37 |
62
+ | 40 | 37 | 81.1% | 91.9% | 94.6% | 1.57 |
63
+ | **整體** | **127** | **81.1%** | **92.1%** | **95.3%** | **1.61** |
64
+
65
+ - **驗證損失**:0.1604
66
+
67
+ ## 與最強模型的比較
68
+
69
+ | 指標 | 最強模型 (DINOv2 ViT-B) | 本模型 | 差距 |
70
+ |------|------------------------|--------|------|
71
+ | Top-1 | 86.6% | 81.1% | -5.5% |
72
+ | 參數量 | ~86.6M | ~22M | **-75%** |
73
+ | 訓練時間 | ~7.2h | ~2.0h | **-72%** |
74
+ | 模型檔案大小 | 339 MB | 112 MB | **-67%** |
75
+ | 推論 tokens | 1369 (patch14) | 1024 (patch16) | -25% |
76
+
77
+ ## 檔案說明
78
+
79
+ | 檔案 | 說明 |
80
+ |------|------|
81
+ | `best_model_20260306_233824.pt` | 最佳 checkpoint(訓練期間最低驗證損失) |
82
+ | `final_model_20260306_233824.pt` | 最終 checkpoint(最後一個 epoch) |
83
+ | `dinov3_vitsplus_tune_02_p2lr5_4ph.yaml` | 完整訓練配置 |
84
+
85
+ ## 使用方式
86
+
87
+ ```python
88
+ import torch
89
+ from coral_reid.config import ExperimentConfig
90
+ from coral_reid.models.coral_model import CoralReIDModel
91
+
92
+ config = ExperimentConfig.from_yaml("dinov3_vitsplus_tune_02_p2lr5_4ph.yaml")
93
+ model = CoralReIDModel.from_config(config.backbone, config.head)
94
+ model.load("best_model_20260306_233824.pt", map_location="cpu")
95
+ model.eval()
96
+
97
+ # 提取嵌入向量
98
+ embedding = model(image_tensor) # (1, 768)
99
+ ```
100
+
101
+ 或使用獨立腳本(不需要 `coral_reid`):
102
+
103
+ ```bash
104
+ uv run python extract_features.py \
105
+ --model dinov3_vitsplus_efficient/best_model_20260306_233824.pt \
106
+ --input /path/to/image.jpg
107
+ ```
108
+
109
+ ## 引用
110
+
111
+ 本模型為珊瑚個體辨識研究的一部分,用於小琉球、綠島及東北角珊瑚礁的長期生態監測。
dinov3_vitsplus_efficient/README.md ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: gpl-3.0
3
+ tags:
4
+ - coral-reef
5
+ - re-identification
6
+ - metric-learning
7
+ - dinov3
8
+ - pytorch
9
+ datasets:
10
+ - custom
11
+ pipeline_tag: image-feature-extraction
12
+ ---
13
+
14
+ # Coral Re-ID: DINOv3 ViT-S+/16 (Efficient)
15
+
16
+ Fine-tuned DINOv3 ViT-S+/16 for underwater coral individual re-identification. This is the **most efficient model** in the project, achieving **81.1% N-Benchmark Top-1 accuracy** with only ~22M parameters and ~2h training time.
17
+
18
+ ## Model Details
19
+
20
+ | | |
21
+ |---|---|
22
+ | **Architecture** | DINOv3 ViT-S+/16 (~22M params) |
23
+ | **Backbone loader** | timm (`vit_small_plus_patch16_dinov3`) |
24
+ | **Input size** | 512 x 512 |
25
+ | **Embedding dim** | 768 |
26
+ | **Backbone output dim** | 384 |
27
+ | **Head** | MLP (384 → 512 → 768, BatchNorm, Dropout 0.3) |
28
+
29
+ ## Training Configuration
30
+
31
+ | | |
32
+ |---|---|
33
+ | **Loss** | Triplet Loss (margin=0.3) + Hard Mining |
34
+ | **Sampler** | MPerClassSampler (m=2) |
35
+ | **Batch size** | 16 (accumulation steps: 8, effective batch: 128) |
36
+ | **Optimizer** | AdamW (weight_decay=1e-4) |
37
+ | **Gradient clipping** | 1.0 |
38
+ | **Early stopping** | patience=6, delta=0.0005 |
39
+ | **Total epochs** | 63 |
40
+ | **Training time** | ~2.0 hours (single GPU) |
41
+
42
+ ### Progressive Unfreezing (4-phase)
43
+
44
+ | Phase | Layers | LR | Max Epochs |
45
+ |-------|--------|----|------------|
46
+ | 1 — Head only | 0 (head only) | 3e-4 | 20 |
47
+ | 2 — Last 2 blocks | 2 | 5e-5 | 20 |
48
+ | 3 — Last 4 blocks | 4 | 1.5e-5 | 15 |
49
+ | 4 — Last 6 blocks | 6 | 1e-5 | 15 |
50
+
51
+ Phase 2 LR was reduced from the default 8e-5 to 5e-5 to prevent early stopping from triggering too soon, giving Phase 3 a better starting point. Phase 4 then further unlocks the model's capacity.
52
+
53
+ ## Evaluation Results (N-Benchmark)
54
+
55
+ Cross-year matching: 2022 (reference) vs 2023 (query), areas 37-40.
56
+
57
+ | Area | Queries | Top-1 | Top-3 | Top-5 | Avg Rank |
58
+ |------|---------|-------|-------|-------|----------|
59
+ | 37 | 32 | 81.2% | 93.8% | 96.9% | 1.56 |
60
+ | 38 | 31 | 77.4% | 90.3% | 93.5% | 1.90 |
61
+ | 39 | 27 | 85.2% | 92.6% | 96.3% | 1.37 |
62
+ | 40 | 37 | 81.1% | 91.9% | 94.6% | 1.57 |
63
+ | **Overall** | **127** | **81.1%** | **92.1%** | **95.3%** | **1.61** |
64
+
65
+ - **Val loss**: 0.1604
66
+
67
+ ## Comparison with Best Model
68
+
69
+ | Metric | Best (DINOv2 ViT-B) | This model | Difference |
70
+ |--------|---------------------|------------|------------|
71
+ | Top-1 | 86.6% | 81.1% | -5.5% |
72
+ | Parameters | ~86.6M | ~22M | **-75%** |
73
+ | Training time | ~7.2h | ~2.0h | **-72%** |
74
+ | Model file size | 339 MB | 112 MB | **-67%** |
75
+ | Inference tokens | 1369 (patch14) | 1024 (patch16) | -25% |
76
+
77
+ ## Files
78
+
79
+ | File | Description |
80
+ |------|-------------|
81
+ | `best_model_20260306_233824.pt` | Best checkpoint (lowest val loss during training) |
82
+ | `final_model_20260306_233824.pt` | Final checkpoint (last epoch) |
83
+ | `dinov3_vitsplus_tune_02_p2lr5_4ph.yaml` | Full training config |
84
+
85
+ ## Usage
86
+
87
+ ```python
88
+ import torch
89
+ from coral_reid.config import ExperimentConfig
90
+ from coral_reid.models.coral_model import CoralReIDModel
91
+
92
+ config = ExperimentConfig.from_yaml("dinov3_vitsplus_tune_02_p2lr5_4ph.yaml")
93
+ model = CoralReIDModel.from_config(config.backbone, config.head)
94
+ model.load("best_model_20260306_233824.pt", map_location="cpu")
95
+ model.eval()
96
+
97
+ # Extract embedding
98
+ embedding = model(image_tensor) # (1, 768)
99
+ ```
100
+
101
+ Or with the standalone script (no `coral_reid` dependency):
102
+
103
+ ```bash
104
+ uv run python extract_features.py \
105
+ --model dinov3_vitsplus_efficient/best_model_20260306_233824.pt \
106
+ --input /path/to/image.jpg
107
+ ```
108
+
109
+ ## Citation
110
+
111
+ Part of the coral re-identification research for long-term ecological monitoring at Xiaoliuqiu, Green Island, and Northeastern Taiwan.
dinov3_vitsplus_efficient/best_model_20260306_233824.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4f525e9298f0d7b773e4736ad15bffbbfce88f5f5431b7ca4062cc55beed51f
3
+ size 117200249
dinov3_vitsplus_efficient/dinov3_vitsplus_tune_02_p2lr5_4ph.yaml ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # DINOv3 ViT-S+ Triplet 調參 Tune-02
2
+ # 問題:同 Tune-01,Phase 2 早收斂 + Phase 3 高基線,且尚未嘗試深化解凍
3
+ # 策略:Tune-01 基礎上新增 Phase 4(6 blocks,LR=1e-5,15ep)
4
+ # 測試在 Phase 2/3 改善後,Phase 4 是否能進一步提升
5
+ # 基準(E1-13):DINOv3 ViT-S+ Triplet 74.0%
6
+ name: dinov3_vitsplus_tune_02_p2lr5_4ph
7
+ seed: 42
8
+ device: cuda
9
+ output_dir: outputs
10
+
11
+ backbone:
12
+ name: timm
13
+ variant: vit_small_plus_patch16_dinov3
14
+ pretrained: true
15
+ freeze: true
16
+ output_dim: 384
17
+ img_size: 512
18
+
19
+ head:
20
+ name: mlp
21
+ input_dim: 384
22
+ hidden_dim: 512
23
+ output_dim: 768
24
+ dropout: 0.3
25
+ use_batchnorm: true
26
+
27
+ loss:
28
+ name: triplet
29
+ margin: 0.3
30
+ mining_strategy: hard
31
+
32
+ data:
33
+ root_dirs:
34
+ - /home/yuc/code/data/coral
35
+ use_whole_image: true
36
+ same_area_negatives: false
37
+ image_size: 512
38
+ train_ratio: 0.7
39
+ val_ratio: 0.15
40
+ test_ratio: 0.15
41
+ num_workers: 4
42
+
43
+ training:
44
+ batch_size: 16
45
+ accumulation_steps: 8
46
+ learning_rate: 0.0003
47
+ weight_decay: 0.0001
48
+ early_stopping_patience: 6
49
+ early_stopping_delta: 0.0005
50
+ scheduler_patience: 3
51
+ scheduler_factor: 0.5
52
+ min_lr: 1.0e-06
53
+ gradient_clip_norm: 1.0
54
+
55
+ phases:
56
+ - name: 'Phase 1: Head Only'
57
+ epochs: 20
58
+ learning_rate: 3.0e-04
59
+ unfreeze_backbone: false
60
+ unfreeze_layers: 0
61
+ - name: 'Phase 2: Head + Last 2 Blocks (LR=5e-5, 20ep)'
62
+ epochs: 20
63
+ learning_rate: 5.0e-05
64
+ unfreeze_backbone: true
65
+ unfreeze_layers: 2
66
+ - name: 'Phase 3: Head + Last 4 Blocks (LR=1.5e-5)'
67
+ epochs: 15
68
+ learning_rate: 1.5e-05
69
+ unfreeze_backbone: true
70
+ unfreeze_layers: 4
71
+ - name: 'Phase 4: Head + Last 6 Blocks (LR=1e-5)'
72
+ epochs: 15
73
+ learning_rate: 1.0e-05
74
+ unfreeze_backbone: true
75
+ unfreeze_layers: 6
dinov3_vitsplus_efficient/final_model_20260306_233824.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58150a259af088911b9e3a03ad50b3feb82a9ee57ef331adc23df61f21f3d3df
3
+ size 117200455
e3_01b_dinov2_vitb_best/README(zh-tw).md ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: gpl-3.0
3
+ tags:
4
+ - coral-reef
5
+ - re-identification
6
+ - metric-learning
7
+ - dinov2
8
+ - pytorch
9
+ datasets:
10
+ - custom
11
+ pipeline_tag: image-feature-extraction
12
+ ---
13
+
14
+ # 珊瑚個體辨識:DINOv2 ViT-B/14(最佳精度)
15
+
16
+ 針對水下珊瑚個體辨識微調的 DINOv2 ViT-B/14 模型。此為本專案中的**最強模型**,達到 **86.6% N-Benchmark Top-1 準確率**。
17
+
18
+ ## 模型規格
19
+
20
+ | | |
21
+ |---|---|
22
+ | **架構** | DINOv2 ViT-B/14 (86.6M 參數) |
23
+ | **Backbone 載入方式** | timm (`vit_base_patch14_dinov2`) |
24
+ | **輸入尺寸** | 518 x 518 |
25
+ | **嵌入維度** | 1280 |
26
+ | **Backbone 輸出維度** | 768 |
27
+ | **Head** | MLP (768 → 1024 → 1280, BatchNorm, Dropout 0.3) |
28
+
29
+ ## 訓練配置
30
+
31
+ | | |
32
+ |---|---|
33
+ | **損失函數** | Triplet Loss (margin=0.3) + Hard Mining |
34
+ | **取樣器** | AreaAwareSampler (area_ratio=0.75) |
35
+ | **批次大小** | 16(累積步數:8,等效批次:128) |
36
+ | **優化器** | AdamW (weight_decay=1e-4) |
37
+ | **梯度裁剪** | 1.0 |
38
+ | **Early stopping** | patience=6, delta=0.0005 |
39
+ | **總 epochs** | 56 |
40
+ | **訓練時間** | 約 7.2 小時(單 GPU) |
41
+
42
+ ### 漸進式解凍(4 階段)
43
+
44
+ | 階段 | 解凍層數 | 學習率 | 最大 Epochs |
45
+ |------|----------|--------|-------------|
46
+ | 1 — 僅 Head | 0(僅 head) | 3e-4 | 20 |
47
+ | 2 — 最後 2 blocks | 2 | 8e-5 | 15 |
48
+ | 3 — 最後 4 blocks | 4 | 3e-5 | 12 |
49
+ | 4 — 最後 6 blocks | 6 | 1e-5 | 15 |
50
+
51
+ ### AreaAwareSampler
52
+
53
+ 每個訓練批次由 75% 同區域珊瑚與 25% 跨區域珊瑚組成。此設計對齊 N-Benchmark 的評估方式(區域內匹配),提供來自同一珊瑚礁區域中視覺上相似的更困難負樣本。
54
+
55
+ ## 評估結果(N-Benchmark)
56
+
57
+ 跨年匹配:2022(參考集)vs 2023(查詢集),區域 37-40。
58
+
59
+ | 區域 | 查詢數 | Top-1 | Top-3 | Top-5 | 平均排名 |
60
+ |------|--------|-------|-------|-------|----------|
61
+ | 37 | 32 | 93.8% | 96.9% | 96.9% | 1.28 |
62
+ | 38 | 31 | 80.6% | 100.0% | 100.0% | 1.19 |
63
+ | 39 | 27 | 85.2% | 92.6% | 96.3% | 1.44 |
64
+ | 40 | 37 | 86.5% | 97.3% | 97.3% | 1.30 |
65
+ | **整體** | **127** | **86.6%** | **96.9%** | **97.6%** | **1.30** |
66
+
67
+ - **最差排名**:9(所有正確匹配均在前 9 名內)
68
+ - **驗證損失**:0.1604
69
+
70
+ ## 檔案說明
71
+
72
+ | 檔案 | 說明 |
73
+ |------|------|
74
+ | `best_model_20260308_110634.pt` | 最佳 checkpoint(訓練期間最低驗證損失) |
75
+ | `final_model_20260308_110634.pt` | 最終 checkpoint(最後一個 epoch) |
76
+ | `e3_01b_same_area_neg_075.yaml` | 完整訓練配置 |
77
+
78
+ ## 使用方式
79
+
80
+ ```python
81
+ import torch
82
+ from coral_reid.config import ExperimentConfig
83
+ from coral_reid.models.coral_model import CoralReIDModel
84
+
85
+ config = ExperimentConfig.from_yaml("e3_01b_same_area_neg_075.yaml")
86
+ model = CoralReIDModel.from_config(config.backbone, config.head)
87
+ model.load("best_model_20260308_110634.pt", map_location="cpu")
88
+ model.eval()
89
+
90
+ # 提取嵌入向量
91
+ embedding = model(image_tensor) # (1, 1280)
92
+ ```
93
+
94
+ 或使用獨立腳本(不需要 `coral_reid`):
95
+
96
+ ```bash
97
+ uv run python extract_features.py \
98
+ --model e3_01b_dinov2_vitb_best/best_model_20260308_110634.pt \
99
+ --input /path/to/image.jpg
100
+ ```
101
+
102
+ ## 引用
103
+
104
+ 本模型為珊瑚個體辨識研究的一部分,用於小琉球、綠島及東北角珊瑚礁的長期生態監測。
e3_01b_dinov2_vitb_best/README.md ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: gpl-3.0
3
+ tags:
4
+ - coral-reef
5
+ - re-identification
6
+ - metric-learning
7
+ - dinov2
8
+ - pytorch
9
+ datasets:
10
+ - custom
11
+ pipeline_tag: image-feature-extraction
12
+ ---
13
+
14
+ # Coral Re-ID: DINOv2 ViT-B/14 (Best Accuracy)
15
+
16
+ Fine-tuned DINOv2 ViT-B/14 for underwater coral individual re-identification. This is the **strongest model** in the project, achieving **86.6% N-Benchmark Top-1 accuracy**.
17
+
18
+ ## Model Details
19
+
20
+ | | |
21
+ |---|---|
22
+ | **Architecture** | DINOv2 ViT-B/14 (86.6M params) |
23
+ | **Backbone loader** | timm (`vit_base_patch14_dinov2`) |
24
+ | **Input size** | 518 x 518 |
25
+ | **Embedding dim** | 1280 |
26
+ | **Backbone output dim** | 768 |
27
+ | **Head** | MLP (768 → 1024 → 1280, BatchNorm, Dropout 0.3) |
28
+
29
+ ## Training Configuration
30
+
31
+ | | |
32
+ |---|---|
33
+ | **Loss** | Triplet Loss (margin=0.3) + Hard Mining |
34
+ | **Sampler** | AreaAwareSampler (area_ratio=0.75) |
35
+ | **Batch size** | 16 (accumulation steps: 8, effective batch: 128) |
36
+ | **Optimizer** | AdamW (weight_decay=1e-4) |
37
+ | **Gradient clipping** | 1.0 |
38
+ | **Early stopping** | patience=6, delta=0.0005 |
39
+ | **Total epochs** | 56 |
40
+ | **Training time** | ~7.2 hours (single GPU) |
41
+
42
+ ### Progressive Unfreezing (4-phase)
43
+
44
+ | Phase | Layers | LR | Max Epochs |
45
+ |-------|--------|----|------------|
46
+ | 1 — Head only | 0 (head only) | 3e-4 | 20 |
47
+ | 2 — Last 2 blocks | 2 | 8e-5 | 15 |
48
+ | 3 — Last 4 blocks | 4 | 3e-5 | 12 |
49
+ | 4 — Last 6 blocks | 6 | 1e-5 | 15 |
50
+
51
+ ### AreaAwareSampler
52
+
53
+ Each training batch is composed of 75% same-area corals and 25% cross-area corals. This aligns training distribution with the N-Benchmark evaluation protocol (within-area matching), providing harder negative examples from visually similar corals in the same reef area.
54
+
55
+ ## Evaluation Results (N-Benchmark)
56
+
57
+ Cross-year matching: 2022 (reference) vs 2023 (query), areas 37-40.
58
+
59
+ | Area | Queries | Top-1 | Top-3 | Top-5 | Avg Rank |
60
+ |------|---------|-------|-------|-------|----------|
61
+ | 37 | 32 | 93.8% | 96.9% | 96.9% | 1.28 |
62
+ | 38 | 31 | 80.6% | 100.0% | 100.0% | 1.19 |
63
+ | 39 | 27 | 85.2% | 92.6% | 96.3% | 1.44 |
64
+ | 40 | 37 | 86.5% | 97.3% | 97.3% | 1.30 |
65
+ | **Overall** | **127** | **86.6%** | **96.9%** | **97.6%** | **1.30** |
66
+
67
+ - **Worst rank**: 9 (all correct matches within top 9)
68
+ - **Val loss**: 0.1604
69
+
70
+ ## Files
71
+
72
+ | File | Description |
73
+ |------|-------------|
74
+ | `best_model_20260308_110634.pt` | Best checkpoint (lowest val loss during training) |
75
+ | `final_model_20260308_110634.pt` | Final checkpoint (last epoch) |
76
+ | `e3_01b_same_area_neg_075.yaml` | Full training config |
77
+
78
+ ## Usage
79
+
80
+ ```python
81
+ import torch
82
+ from coral_reid.config import ExperimentConfig
83
+ from coral_reid.models.coral_model import CoralReIDModel
84
+
85
+ config = ExperimentConfig.from_yaml("e3_01b_same_area_neg_075.yaml")
86
+ model = CoralReIDModel.from_config(config.backbone, config.head)
87
+ model.load("best_model_20260308_110634.pt", map_location="cpu")
88
+ model.eval()
89
+
90
+ # Extract embedding
91
+ embedding = model(image_tensor) # (1, 1280)
92
+ ```
93
+
94
+ Or with the standalone script (no `coral_reid` dependency):
95
+
96
+ ```bash
97
+ uv run python extract_features.py \
98
+ --model e3_01b_dinov2_vitb_best/best_model_20260308_110634.pt \
99
+ --input /path/to/image.jpg
100
+ ```
101
+
102
+ ## Citation
103
+
104
+ Part of the coral re-identification research for long-term ecological monitoring at Xiaoliuqiu, Green Island, and Northeastern Taiwan.
e3_01b_dinov2_vitb_best/best_model_20260308_110634.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b77d102cabc4611df9227f4fc1be9ddeb134459265259f0783ff7f28a8323cc0
3
+ size 354830189
e3_01b_dinov2_vitb_best/e3_01b_same_area_neg_075.yaml ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # E3-01b: Same Area Negatives (area_ratio=0.75)
2
+ # 基準:dinov2_vitb_tune_02_4ph (84.3%)
3
+ # 策略:AreaAwareSampler,每 batch 75% 同區域 + 25% 跨區域(更強 hard negatives)
4
+ name: e3_01b_same_area_neg_075
5
+ seed: 42
6
+ device: cuda
7
+ output_dir: outputs
8
+
9
+ backbone:
10
+ name: timm
11
+ variant: vit_base_patch14_dinov2
12
+ pretrained: true
13
+ freeze: true
14
+ output_dim: 768
15
+ img_size: 518
16
+
17
+ head:
18
+ name: mlp
19
+ input_dim: 768
20
+ hidden_dim: 1024
21
+ output_dim: 1280
22
+ dropout: 0.3
23
+ use_batchnorm: true
24
+
25
+ loss:
26
+ name: triplet
27
+ margin: 0.3
28
+ mining_strategy: hard
29
+
30
+ data:
31
+ root_dirs:
32
+ - /home/yuc/code/data/coral
33
+ use_whole_image: true
34
+ same_area_negatives: true
35
+ area_ratio: 0.75
36
+ image_size: 518
37
+ train_ratio: 0.7
38
+ val_ratio: 0.15
39
+ test_ratio: 0.15
40
+ num_workers: 4
41
+
42
+ training:
43
+ batch_size: 16
44
+ accumulation_steps: 8
45
+ learning_rate: 0.0003
46
+ weight_decay: 0.0001
47
+ early_stopping_patience: 6
48
+ early_stopping_delta: 0.0005
49
+ scheduler_patience: 3
50
+ scheduler_factor: 0.5
51
+ min_lr: 1.0e-06
52
+ gradient_clip_norm: 1.0
53
+
54
+ phases:
55
+ - name: 'Phase 1: Head Only'
56
+ epochs: 20
57
+ learning_rate: 3.0e-04
58
+ unfreeze_backbone: false
59
+ unfreeze_layers: 0
60
+ - name: 'Phase 2: Head + Last 2 Blocks'
61
+ epochs: 15
62
+ learning_rate: 8.0e-05
63
+ unfreeze_backbone: true
64
+ unfreeze_layers: 2
65
+ - name: 'Phase 3: Head + Last 4 Blocks'
66
+ epochs: 12
67
+ learning_rate: 3.0e-05
68
+ unfreeze_backbone: true
69
+ unfreeze_layers: 4
70
+ - name: 'Phase 4: Head + Last 6 Blocks (LR=1e-5)'
71
+ epochs: 15
72
+ learning_rate: 1.0e-05
73
+ unfreeze_backbone: true
74
+ unfreeze_layers: 6
e3_01b_dinov2_vitb_best/final_model_20260308_110634.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6537022312141ca144fdd3c032826a4cbcb3a0117047df210e1bc07838d5996
3
+ size 354830383
extract_features.py ADDED
@@ -0,0 +1,486 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Standalone feature extraction for coral re-identification models.
2
+
3
+ Reconstructs the model architecture from checkpoint metadata (or a YAML config
4
+ as fallback) and loads weights without depending on the coral_reid package.
5
+
6
+ Usage:
7
+ # Extract features from a directory of images
8
+ uv run python extract_features.py \
9
+ --model e3_01b_dinov2_vitb_best/best_model_20260308_110634.pt \
10
+ --input /path/to/images \
11
+ --output features.h5
12
+
13
+ # Extract features for N-Benchmark (by area)
14
+ uv run python extract_features.py \
15
+ --model e3_01b_dinov2_vitb_best/best_model_20260308_110634.pt \
16
+ --input /path/to/2022sample \
17
+ --areas 37 38 39 40 \
18
+ --output features/
19
+
20
+ # Single image embedding (prints to stdout)
21
+ uv run python extract_features.py \
22
+ --model e3_01b_dinov2_vitb_best/best_model_20260308_110634.pt \
23
+ --input /path/to/single_image.jpg
24
+ """
25
+
26
+ from __future__ import annotations
27
+
28
+ import argparse
29
+ import logging
30
+ import os
31
+ from dataclasses import dataclass
32
+ from pathlib import Path
33
+
34
+ import h5py
35
+ import numpy as np
36
+ import timm
37
+ import torch
38
+ import torch.nn as nn
39
+ import torch.nn.functional as F
40
+ import yaml
41
+ from PIL import Image
42
+ from torchvision import transforms
43
+ from tqdm import tqdm
44
+
45
+ logging.basicConfig(
46
+ level=logging.INFO,
47
+ format="%(asctime)s [%(levelname)s] %(message)s",
48
+ )
49
+ logger = logging.getLogger(__name__)
50
+
51
+
52
+ # ---------------------------------------------------------------------------
53
+ # Configuration
54
+ # ---------------------------------------------------------------------------
55
+
56
+
57
+ @dataclass
58
+ class ModelConfig:
59
+ """Model configuration parsed from YAML."""
60
+
61
+ # Backbone
62
+ backbone_variant: str
63
+ img_size: int
64
+ backbone_output_dim: int
65
+
66
+ # Head
67
+ hidden_dim: int
68
+ output_dim: int
69
+ dropout: float
70
+ use_batchnorm: bool
71
+
72
+ @classmethod
73
+ def from_dict(cls, d: dict) -> ModelConfig:
74
+ """Create config from a dict (embedded in checkpoint)."""
75
+ return cls(
76
+ backbone_variant=d["backbone_variant"],
77
+ img_size=d.get("img_size", 224),
78
+ backbone_output_dim=d["backbone_output_dim"],
79
+ hidden_dim=d["hidden_dim"],
80
+ output_dim=d["output_dim"],
81
+ dropout=d.get("dropout", 0.3),
82
+ use_batchnorm=d.get("use_batchnorm", True),
83
+ )
84
+
85
+ @classmethod
86
+ def from_yaml(cls, path: str | Path) -> ModelConfig:
87
+ with open(path) as f:
88
+ cfg = yaml.safe_load(f)
89
+
90
+ backbone = cfg["backbone"]
91
+ head = cfg["head"]
92
+
93
+ return cls(
94
+ backbone_variant=backbone["variant"],
95
+ img_size=backbone.get("img_size", 224),
96
+ backbone_output_dim=backbone["output_dim"],
97
+ hidden_dim=head["hidden_dim"],
98
+ output_dim=head["output_dim"],
99
+ dropout=head.get("dropout", 0.3),
100
+ use_batchnorm=head.get("use_batchnorm", True),
101
+ )
102
+
103
+
104
+ # ---------------------------------------------------------------------------
105
+ # Model Architecture (standalone reconstruction)
106
+ # ---------------------------------------------------------------------------
107
+
108
+
109
+ class MLPHead(nn.Module):
110
+ """MLP projection head with L2 normalization.
111
+
112
+ Architecture:
113
+ BatchNorm1d → Dropout(0.2)
114
+ → Linear → ReLU → Dropout → Linear → [BatchNorm1d]
115
+ → L2 Normalize
116
+ """
117
+
118
+ def __init__(
119
+ self,
120
+ input_dim: int,
121
+ hidden_dim: int,
122
+ output_dim: int,
123
+ dropout: float = 0.3,
124
+ use_batchnorm: bool = True,
125
+ ) -> None:
126
+ super().__init__()
127
+
128
+ self.feature_processor = nn.Sequential(
129
+ nn.BatchNorm1d(input_dim),
130
+ nn.Dropout(p=0.2),
131
+ )
132
+
133
+ layers: list[nn.Module] = [
134
+ nn.Linear(input_dim, hidden_dim),
135
+ nn.ReLU(),
136
+ nn.Dropout(p=dropout),
137
+ nn.Linear(hidden_dim, output_dim),
138
+ ]
139
+ if use_batchnorm:
140
+ layers.append(nn.BatchNorm1d(output_dim))
141
+
142
+ self.projection = nn.Sequential(*layers)
143
+
144
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
145
+ x = self.feature_processor(x)
146
+ x = self.projection(x)
147
+ return F.normalize(x, p=2, dim=1)
148
+
149
+
150
+ class CoralReIDModel(nn.Module):
151
+ """Coral re-identification model: timm backbone + MLP head."""
152
+
153
+ def __init__(self, config: ModelConfig) -> None:
154
+ super().__init__()
155
+
156
+ # Backbone: timm model with classification head removed
157
+ self.backbone = timm.create_model(
158
+ config.backbone_variant,
159
+ pretrained=False, # weights come from checkpoint
160
+ num_classes=0,
161
+ img_size=config.img_size,
162
+ )
163
+
164
+ self.head = MLPHead(
165
+ input_dim=config.backbone_output_dim,
166
+ hidden_dim=config.hidden_dim,
167
+ output_dim=config.output_dim,
168
+ dropout=config.dropout,
169
+ use_batchnorm=config.use_batchnorm,
170
+ )
171
+
172
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
173
+ features = self.backbone(x)
174
+ return self.head(features)
175
+
176
+
177
+ def load_model(
178
+ checkpoint_path: str | Path,
179
+ device: str | torch.device = "cpu",
180
+ config_path: str | Path | None = None,
181
+ ) -> tuple[CoralReIDModel, ModelConfig]:
182
+ """Load model from checkpoint file.
183
+
184
+ Model config is read from the checkpoint's ``model_config`` key.
185
+ If the checkpoint doesn't contain it, ``config_path`` (YAML) is used
186
+ as a fallback.
187
+
188
+ Args:
189
+ checkpoint_path: Path to the .pt checkpoint file.
190
+ device: Device to load the model on.
191
+ config_path: Optional path to a YAML config (fallback).
192
+
193
+ Returns:
194
+ Tuple of (model, config).
195
+ """
196
+ # Checkpoint is a dict with "model_state_dict" key
197
+ checkpoint = torch.load(checkpoint_path, map_location=device, weights_only=False)
198
+
199
+ # Resolve config: checkpoint-embedded > YAML fallback
200
+ if isinstance(checkpoint, dict) and "model_config" in checkpoint:
201
+ config = ModelConfig.from_dict(checkpoint["model_config"])
202
+ elif config_path is not None:
203
+ config = ModelConfig.from_yaml(config_path)
204
+ else:
205
+ raise ValueError(
206
+ "Checkpoint does not contain model_config and no --config provided. "
207
+ "Use embed_config.py to add config to the checkpoint, or pass --config."
208
+ )
209
+
210
+ model = CoralReIDModel(config)
211
+
212
+ if isinstance(checkpoint, dict) and "model_state_dict" in checkpoint:
213
+ state_dict = checkpoint["model_state_dict"]
214
+ else:
215
+ # Fallback: raw state_dict
216
+ state_dict = checkpoint
217
+
218
+ # Map keys: original uses "backbone.model.*", timm direct uses "backbone.*"
219
+ mapped_state_dict: dict[str, torch.Tensor] = {}
220
+ for key, value in state_dict.items():
221
+ if key.startswith("backbone.model."):
222
+ new_key = key.replace("backbone.model.", "backbone.", 1)
223
+ else:
224
+ new_key = key
225
+ mapped_state_dict[new_key] = value
226
+
227
+ model.load_state_dict(mapped_state_dict)
228
+ model.to(device)
229
+ model.eval()
230
+
231
+ logger.info(
232
+ f"Loaded model: {config.backbone_variant} "
233
+ f"({config.img_size}px, {config.output_dim}d embedding)"
234
+ )
235
+ return model, config
236
+
237
+
238
+ # ---------------------------------------------------------------------------
239
+ # Inference Transforms
240
+ # ---------------------------------------------------------------------------
241
+
242
+
243
+ def get_inference_transforms(image_size: int) -> transforms.Compose:
244
+ """Create inference transforms matching training pipeline."""
245
+ return transforms.Compose([
246
+ transforms.Resize(
247
+ (image_size, image_size),
248
+ interpolation=transforms.InterpolationMode.BICUBIC,
249
+ ),
250
+ transforms.ToTensor(),
251
+ transforms.Normalize(
252
+ mean=[0.485, 0.456, 0.406],
253
+ std=[0.229, 0.224, 0.225],
254
+ ),
255
+ ])
256
+
257
+
258
+ # ---------------------------------------------------------------------------
259
+ # Feature Extraction
260
+ # ---------------------------------------------------------------------------
261
+
262
+
263
+ @torch.no_grad()
264
+ def extract_single(
265
+ model: CoralReIDModel,
266
+ img_path: str | Path,
267
+ transform: transforms.Compose,
268
+ device: str | torch.device,
269
+ ) -> np.ndarray | None:
270
+ """Extract feature embedding from a single image."""
271
+ try:
272
+ img = Image.open(img_path).convert("RGB")
273
+ tensor = transform(img).unsqueeze(0).to(device)
274
+ embedding = model(tensor)
275
+ return embedding.cpu().numpy().flatten()
276
+ except Exception as e:
277
+ logger.warning(f"Failed to process {img_path}: {e}")
278
+ return None
279
+
280
+
281
+ @torch.no_grad()
282
+ def extract_directory(
283
+ model: CoralReIDModel,
284
+ directory: str | Path,
285
+ transform: transforms.Compose,
286
+ device: str | torch.device,
287
+ batch_size: int = 32,
288
+ ) -> tuple[np.ndarray, list[str]]:
289
+ """Extract features from all images in a directory.
290
+
291
+ Returns:
292
+ Tuple of (features array [N, D], list of coral names).
293
+ """
294
+ directory = Path(directory)
295
+ image_files = sorted(
296
+ f
297
+ for f in os.listdir(directory)
298
+ if f.lower().endswith((".jpg", ".jpeg", ".png"))
299
+ )
300
+
301
+ if not image_files:
302
+ logger.warning(f"No images found in {directory}")
303
+ return np.array([]), []
304
+
305
+ features_list: list[np.ndarray] = []
306
+ coral_names: list[str] = []
307
+
308
+ for i in tqdm(range(0, len(image_files), batch_size), desc=str(directory)):
309
+ batch_files = image_files[i : i + batch_size]
310
+ batch_tensors: list[torch.Tensor] = []
311
+ batch_names: list[str] = []
312
+
313
+ for fname in batch_files:
314
+ try:
315
+ img = Image.open(directory / fname).convert("RGB")
316
+ batch_tensors.append(transform(img))
317
+ batch_names.append(os.path.splitext(fname)[0])
318
+ except Exception as e:
319
+ logger.warning(f"Skipping {fname}: {e}")
320
+
321
+ if batch_tensors:
322
+ batch = torch.stack(batch_tensors).to(device)
323
+ feats = model(batch).cpu().numpy()
324
+ features_list.append(feats)
325
+ coral_names.extend(batch_names)
326
+
327
+ if features_list:
328
+ features = np.concatenate(features_list, axis=0)
329
+ else:
330
+ features = np.array([])
331
+
332
+ return features, coral_names
333
+
334
+
335
+ def save_features_h5(
336
+ path: str | Path,
337
+ features: np.ndarray,
338
+ coral_names: list[str],
339
+ metadata: dict[str, str | int | float] | None = None,
340
+ ) -> None:
341
+ """Save features to HDF5 file."""
342
+ path = Path(path)
343
+ path.parent.mkdir(parents=True, exist_ok=True)
344
+
345
+ with h5py.File(path, "w") as f:
346
+ f.create_dataset("features", data=features)
347
+ f.create_dataset(
348
+ "coral_names",
349
+ data=[name.encode("utf-8") for name in coral_names],
350
+ )
351
+ f.attrs["feature_dim"] = features.shape[1] if len(features.shape) > 1 else 0
352
+ f.attrs["num_samples"] = features.shape[0]
353
+
354
+ if metadata:
355
+ for key, value in metadata.items():
356
+ if value is not None:
357
+ f.attrs[key] = value
358
+
359
+ logger.info(f"Saved {len(coral_names)} features to {path}")
360
+
361
+
362
+ # ---------------------------------------------------------------------------
363
+ # CLI
364
+ # ---------------------------------------------------------------------------
365
+
366
+
367
+ def parse_args() -> argparse.Namespace:
368
+ parser = argparse.ArgumentParser(
369
+ description="Standalone feature extraction for coral re-identification models",
370
+ )
371
+ parser.add_argument(
372
+ "--model",
373
+ required=True,
374
+ help="Path to model checkpoint (.pt)",
375
+ )
376
+ parser.add_argument(
377
+ "--config",
378
+ default=None,
379
+ help="Path to YAML config file (optional if config is embedded in checkpoint)",
380
+ )
381
+ parser.add_argument(
382
+ "--input",
383
+ required=True,
384
+ help="Path to image file or directory",
385
+ )
386
+ parser.add_argument(
387
+ "--output",
388
+ default=None,
389
+ help="Output path (.h5 file or directory for area mode)",
390
+ )
391
+ parser.add_argument(
392
+ "--areas",
393
+ nargs="+",
394
+ default=None,
395
+ help="Area IDs for N-Benchmark extraction (e.g., 37 38 39 40)",
396
+ )
397
+ parser.add_argument(
398
+ "--year",
399
+ default=None,
400
+ help="Year label for area mode filenames (e.g., 2022)",
401
+ )
402
+ parser.add_argument(
403
+ "--batch-size",
404
+ type=int,
405
+ default=32,
406
+ help="Batch size for extraction (default: 32)",
407
+ )
408
+ parser.add_argument(
409
+ "--device",
410
+ default="cuda" if torch.cuda.is_available() else "cpu",
411
+ help="Device (default: cuda if available)",
412
+ )
413
+ return parser.parse_args()
414
+
415
+
416
+ def main() -> None:
417
+ args = parse_args()
418
+ input_path = Path(args.input)
419
+
420
+ # Load model
421
+ model, config = load_model(args.model, args.device, config_path=args.config)
422
+ transform = get_inference_transforms(config.img_size)
423
+
424
+ # --- Single image mode ---
425
+ if input_path.is_file():
426
+ embedding = extract_single(model, input_path, transform, args.device)
427
+ if embedding is not None:
428
+ print(f"Image: {input_path.name}")
429
+ print(f"Embedding shape: {embedding.shape}")
430
+ print(f"Embedding norm: {np.linalg.norm(embedding):.4f}")
431
+ if args.output:
432
+ np.save(args.output, embedding)
433
+ logger.info(f"Saved embedding to {args.output}")
434
+ else:
435
+ print(f"Embedding: {embedding[:8]}... (first 8 dims)")
436
+ return
437
+
438
+ # --- Area mode (N-Benchmark style) ---
439
+ if args.areas:
440
+ output_dir = Path(args.output) if args.output else Path("features")
441
+ output_dir.mkdir(parents=True, exist_ok=True)
442
+
443
+ for area_id in args.areas:
444
+ area_dir = input_path / area_id
445
+ if not area_dir.exists():
446
+ logger.warning(f"Area directory not found: {area_dir}")
447
+ continue
448
+
449
+ features, names = extract_directory(
450
+ model, area_dir, transform, args.device, args.batch_size,
451
+ )
452
+ if len(features) > 0:
453
+ if args.year:
454
+ out_path = output_dir / f"features_{args.year}_{area_id}_whole.h5"
455
+ else:
456
+ out_path = output_dir / f"features_{area_id}_whole.h5"
457
+ save_features_h5(
458
+ out_path,
459
+ features,
460
+ names,
461
+ {"area_id": area_id, "source_dir": str(area_dir)},
462
+ )
463
+ return
464
+
465
+ # --- Directory mode ---
466
+ if input_path.is_dir():
467
+ features, names = extract_directory(
468
+ model, input_path, transform, args.device, args.batch_size,
469
+ )
470
+ if len(features) > 0:
471
+ output_path = args.output or "features.h5"
472
+ save_features_h5(
473
+ output_path,
474
+ features,
475
+ names,
476
+ {"source_dir": str(input_path)},
477
+ )
478
+ else:
479
+ logger.error("No features extracted")
480
+ return
481
+
482
+ logger.error(f"Input path not found: {input_path}")
483
+
484
+
485
+ if __name__ == "__main__":
486
+ main()
legacy/dinov2_coral_best_model_20251015_165008.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6eb998eb8a37c7a539a36c6b32485beb9ff3d8c1fca5d5aacfa8cb9aefbfd47b
3
+ size 354828824
legacy/dinov2_coral_best_model_20251016_133229.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3f8af48b28b22c23591f68a96144ce1fc4c3c597a5248476d59e5e0a2abaf26
3
+ size 354828824
pyproject.toml ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "coral-models"
3
+ version = "0.1.0"
4
+ description = "Standalone inference for coral re-identification models"
5
+ requires-python = ">=3.10"
6
+ license = "GPL-3.0-or-later"
7
+ dependencies = [
8
+ "torch>=2.0.0",
9
+ "torchvision>=0.15.0",
10
+ "timm>=1.0.0",
11
+ "h5py>=3.9.0",
12
+ "numpy>=1.24.0",
13
+ "pillow>=10.0.0",
14
+ "pyyaml>=6.0",
15
+ "tqdm>=4.65.0",
16
+ ]
17
+
18
+ [tool.uv]
19
+ package = false
uv.lock ADDED
The diff for this file is too large to render. See raw diff