YuC13600 commited on 29 days ago

Commit

36a6756

verified ·

1 Parent(s): f187f17

Upload folder using huggingface_hub

Browse files

Files changed (19) hide show

.gitattributes +0 -34
.gitignore +28 -0
README(zh-tw).md +176 -0
README.md +176 -3
dinov3_vitsplus_efficient/README(zh-tw).md +111 -0
dinov3_vitsplus_efficient/README.md +111 -0
dinov3_vitsplus_efficient/best_model_20260306_233824.pt +3 -0
dinov3_vitsplus_efficient/dinov3_vitsplus_tune_02_p2lr5_4ph.yaml +75 -0
dinov3_vitsplus_efficient/final_model_20260306_233824.pt +3 -0
e3_01b_dinov2_vitb_best/README(zh-tw).md +104 -0
e3_01b_dinov2_vitb_best/README.md +104 -0
e3_01b_dinov2_vitb_best/best_model_20260308_110634.pt +3 -0
e3_01b_dinov2_vitb_best/e3_01b_same_area_neg_075.yaml +74 -0
e3_01b_dinov2_vitb_best/final_model_20260308_110634.pt +3 -0
extract_features.py +486 -0
legacy/dinov2_coral_best_model_20251015_165008.pt +3 -0
legacy/dinov2_coral_best_model_20251016_133229.pt +3 -0
pyproject.toml +19 -0
uv.lock +0 -0

.gitattributes CHANGED Viewed

@@ -1,35 +1 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
 *.pt filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text























1	*.pt filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,28 @@

+# Python
+__pycache__/
+*.py[cod]
+*.egg-info/
+# Virtual environments
+.venv/
+# Benchmark data and scripts (not for HF)
+2022sample/
+2023sample/
+benchmark.py
+embed_config.py
+results_*.json
+# Extracted features
+features/
+*.h5
+# IDE
+.idea/
+.vscode/
+*.swp
+*.swo
+# OS
+.DS_Store
+Thumbs.db

README(zh-tw).md ADDED Viewed

	@@ -0,0 +1,176 @@

+---
+license: gpl-3.0
+tags:
+  - coral-reef
+  - re-identification
+  - metric-learning
+  - dinov2
+  - dinov3
+  - pytorch
+datasets:
+  - custom
+pipeline_tag: image-feature-extraction
+---
+# 珊瑚個體辨識模型
+針對水下珊瑚個體跨年辨識的微調模型。
+本專案包含兩個最佳模型與獨立推論腳本，無需依賴訓練程式碼（`coral_reid`）即可運作。
+> 原始碼：[GitHub](https://github.com/YuC13600/coral_models)
+## 最佳模型
+### 最高精度 — E3-01b DINOv2 ViT-B/14
+| | |
+|---|---|
+| **N-Benchmark Top-1** | **86.6%** (110/127) |
+| Top-3 / Top-5 / Top-10 | 96.9% / 97.6% / 100.0% |
+| 平均排名 / 最差排名 | 1.30 / 9 |
+| Backbone | DINOv2 ViT-B/14 (86.6M 參數, timm 518×518) |
+| 損失函數 | Triplet (margin=0.3) + Hard Mining |
+| 取樣器 | AreaAwareSampler (area_ratio=0.75) |
+| 訓練 | 4 階段漸進式解凍，56 epochs，約 7.2 小時 |
+| 嵌入維度 | 1280-d，L2 正規化 |
+| 檔案 | `e3_01b_dinov2_vitb_best/` |
+### 最高效率 — DINOv3 ViT-S+/16
+| | |
+|---|---|
+| **N-Benchmark Top-1** | **81.1%** (103/127) |
+| Top-3 / Top-5 / Top-10 | 92.1% / 95.3% / 99.2% |
+| 平均排名 | 1.61 |
+| Backbone | DINOv3 ViT-S+/16 (~22M 參數, timm 512×512) |
+| 損失函數 | Triplet (margin=0.3) + Hard Mining |
+| 取樣器 | MPerClassSampler (m=2) |
+| 訓練 | 4 階段漸進式解凍，63 epochs，約 2.0 小時 |
+| 嵌入維度 | 768-d，L2 正規化 |
+| 檔案 | `dinov3_vitsplus_efficient/` |
+### 模型比較
+| 指標 | 最高精度 | 最高效率 | 差距 |
+|------|---------|---------|------|
+| Top-1 | 86.6% | 81.1% | -5.5% |
+| 參數量 | ~86.6M | ~22M | **-75%** |
+| 模型大小 | 339 MB | 112 MB | **-67%** |
+| 訓練時間 | ~7.2h | ~2.0h | **-72%** |
+| 推論 tokens | 1369 (patch14) | 1024 (patch16) | -25% |
+## 快速開始
+```bash
+# 安裝依賴（獨立環境，不需要 coral_reid）
+uv sync
+# 提取單張圖片特徵
+uv run python extract_features.py \
+    --model e3_01b_dinov2_vitb_best/best_model_20260308_110634.pt \
+    --input /path/to/image.jpg
+# 提取整個目錄的特徵
+uv run python extract_features.py \
+    --model e3_01b_dinov2_vitb_best/best_model_20260308_110634.pt \
+    --input /path/to/images/ \
+    --output features.h5
+```
+## N-Benchmark 各區域結果
+### E3-01b DINOv2 ViT-B/14（最佳）
+| 區域 | 查詢數 | Top-1 | Top-3 | Top-5 | 平均排名 |
+|------|--------|-------|-------|-------|----------|
+| 37 | 32 | 93.8% | 96.9% | 96.9% | 1.28 |
+| 38 | 31 | 80.6% | 100.0% | 100.0% | 1.19 |
+| 39 | 27 | 85.2% | 92.6% | 96.3% | 1.44 |
+| 40 | 37 | 86.5% | 97.3% | 97.3% | 1.30 |
+| **整體** | **127** | **86.6%** | **96.9%** | **97.6%** | **1.30** |
+### DINOv3 ViT-S+/16（高效率）
+| 區域 | 查詢數 | Top-1 | Top-3 | Top-5 | 平均排名 |
+|------|--------|-------|-------|-------|----------|
+| 37 | 32 | 81.2% | 93.8% | 96.9% | 1.56 |
+| 38 | 31 | 77.4% | 90.3% | 93.5% | 1.90 |
+| 39 | 27 | 85.2% | 92.6% | 96.3% | 1.37 |
+| 40 | 37 | 81.1% | 91.9% | 94.6% | 1.57 |
+| **整體** | **127** | **81.1%** | **92.1%** | **95.3%** | **1.61** |
+## 完整模型歷史
+### 模型比較表
+| 模型名稱 | 架構 | Backbone | 損失函數 | 挖掘方式 | 同區域負樣本 | 圖片 | 測試準確率 | 測試損失 | 驗證損失 | N-Bench 平均 | A37 | A38 | A39 | A40 | 時間 |
+| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |
+| 預訓練 | - | DINOv2-B/14 | - | - | - | bbox | - | - | - | 29.48% | 28.12% | 35.48% | 29.63% | 24.32% | - |
+| 20250812_152526 | 舊 | DINOv2-B/14 | Triplet | 預組三元組 | ❌ | bbox | 92.6% | 0.1659 | - | 48.25% | 50.00% | 51.61% | 48.15% | 43.24% | ~16h |
+| 20251007_133126 | 舊 | DINOv2-B/14 | Triplet | 預組三元組 | ✅ | bbox | 88.8% | 0.2523 | - | 39.32% | 46.88% | 41.94% | 33.33% | 35.14% | ~16h |
+| 20251008_094017 | 舊 | DINOv2-B/14 | Triplet | 預組三元組 | ✅ | bbox | 90.4% | 0.1636 | - | 40.19% | 37.50% | 48.39% | 37.04% | 37.84% | ~16h |
+| 20251014_183603 | 舊 | DINOv2-B/14 | Triplet | 預組三元組 | ❌ | bbox | 92.8% | 0.1012 | - | 40.97% | 37.50% | 38.71% | 44.44% | 43.24% | ~16h |
+| 預訓練 | - | DINOv2-B/14 | - | - | - | whole | - | - | - | 50.88% | 34.38% | 54.84% | 62.96% | 51.35% | - |
+| 20251015_165008 | 舊 | DINOv2-B/14 | Triplet | 預組三元組 | ✅ | whole | 92.7% | 0.1330 | 0.1006 | 64.43% | 62.50% | 61.29% | 55.56% | 78.38% | ~16h |
+| 20251016_133229 | 舊 | DINOv2-B/14 | Triplet | 預組三元組 | ❌ | whole | 97.9% | 0.0429 | - | 63.31% | 56.25% | 58.06% | 74.07% | 64.86% | ~16h |
+| **20260308_110634** | **新** | **DINOv2-B/14 (timm 518)** | **Triplet** | **動態 (PML)** | **AreaAware 0.75** | whole | - | - | **0.1604** | **86.6%** | **93.8%** | **80.6%** | **85.2%** | **86.5%** | **~7.2h** |
+| **20260306_233824** | **新** | **DINOv3-S+/16 (timm 512)** | **Triplet** | **動態 (PML)** | ❌ | whole | - | - | **0.1604** | **81.1%** | **81.2%** | **77.4%** | **85.2%** | **81.1%** | **~2.0h** |
+### 欄位說明
+| 欄位 | 說明 |
+| --- | --- |
+| 架構 | `舊` = 舊專案實作，`新` = 重構後的模組化架構 |
+| Backbone | 特徵提取器（DINOv2-B/14、DINOv3-S+/16 等） |
+| 損失函數 | 損失函數類型（Triplet、ArcFace、CosFace、Circle、Contrastive 等） |
+| 挖掘方式 | 樣本挖掘策略：`預組三元組` = 固定三元組，`動態 (PML)` = MPerClassSampler |
+| 同區域負樣本 | 是否限制負樣本來自同一地理區域（`AreaAware 0.75` = 75% 同區域） |
+| 圖片 | `bbox` = EXIF 邊界框裁切，`whole` = 完整圖片 |
+| 測試準確率 | 測試集準確率（僅舊架構，衡量 pos_dist < neg_dist） |
+| 驗證損失 | 訓練期間最佳驗證損失 |
+| N-Bench 平均 | N-Benchmark Top-1 準確率（區域 37-40 平均） |
+### 架構差異
+| 特性 | 舊架構 | 新架構 |
+| --- | --- | --- |
+| Dataset 輸出 | `(anchor, pos, neg)` - 3 張圖片 | `(image, label)` - 1 張圖片 |
+| 三元組形成 | 訓練前預先組成 | 每批次動態挖掘 |
+| 批次取樣器 | 隨機 | MPerClassSampler (m=2) |
+| 損失函數 | 自訂 TripletLossWithMining | PML TripletMarginLoss |
+| 每 Epoch 樣本數 | ~50,000 三元組 × 3 張圖片 | ~4,000 張圖片 |
+| 訓練速度 | ~23 分鐘/epoch | ~1.5 分鐘/epoch |
+| 同區域負樣本 | 已實作 | 已實作（AreaAwareSampler） |
+> **N-Benchmark（最近鄰基準測試）**：在區域 37-40 中，跨 2022 與 2023 年比對珊瑚標本時，正確辨識的 Top-1 準確率。
+## 專案結構
+```
+coral_models/
+├── pyproject.toml                        # uv 環境（獨立）
+├── extract_features.py                   # 特徵提取腳本
+├── e3_01b_dinov2_vitb_best/              # 最高精度模型 (86.6%)
+│   ├── best_model_20260308_110634.pt
+│   ├── final_model_20260308_110634.pt
+│   ├── e3_01b_same_area_neg_075.yaml
+│   ├── README.md
+│   └── README(zh-tw).md
+├── dinov3_vitsplus_efficient/            # 最高效率模型 (81.1%)
+│   ├── best_model_20260306_233824.pt
+│   ├── final_model_20260306_233824.pt
+│   ├── dinov3_vitsplus_tune_02_p2lr5_4ph.yaml
+│   ├── README.md
+│   └── README(zh-tw).md
+└── legacy/                              # 舊架構模型 (torch.hub, 224×224)
+    ├── dinov2_coral_best_model_20251015_165008.pt      # 64.43%（舊最佳，同區域負樣本，完整圖片）
+    └── dinov2_coral_best_model_20251016_133229.pt      # 63.31%（無同區域負樣本，完整圖片）
+```
+## 授權條款
+本專案採用 GPL-3.0 授權。
+基於 Meta Platforms, Inc. 的 DINOv2 與 DINOv3（Apache License 2.0）。

README.md CHANGED Viewed

@@ -1,3 +1,176 @@
----
-license: gpl-3.0
----

+---
+license: gpl-3.0
+tags:
+  - coral-reef
+  - re-identification
+  - metric-learning
+  - dinov2
+  - dinov3
+  - pytorch
+datasets:
+  - custom
+pipeline_tag: image-feature-extraction
+---
+# Coral Re-Identification Models
+Fine-tuned models for underwater coral individual re-identification across multiple years.
+This repository contains the two best models and standalone inference scripts. No dependency on the training codebase (`coral_reid`) is required.
+> Source code: [GitHub](https://github.com/YuC13600/coral_models)
+## Best Models
+### Best Accuracy — E3-01b DINOv2 ViT-B/14
+| | |
+|---|---|
+| **N-Benchmark Top-1** | **86.6%** (110/127) |
+| Top-3 / Top-5 / Top-10 | 96.9% / 97.6% / 100.0% |
+| Avg Rank / Worst Rank | 1.30 / 9 |
+| Backbone | DINOv2 ViT-B/14 (86.6M params, timm 518×518) |
+| Loss | Triplet (margin=0.3) + Hard Mining |
+| Sampler | AreaAwareSampler (area_ratio=0.75) |
+| Training | 4-phase progressive unfreezing, 56 epochs, ~7.2h |
+| Embedding | 1280-d, L2-normalized |
+| Files | `e3_01b_dinov2_vitb_best/` |
+### Most Efficient — DINOv3 ViT-S+/16
+| | |
+|---|---|
+| **N-Benchmark Top-1** | **81.1%** (103/127) |
+| Top-3 / Top-5 / Top-10 | 92.1% / 95.3% / 99.2% |
+| Avg Rank | 1.61 |
+| Backbone | DINOv3 ViT-S+/16 (~22M params, timm 512×512) |
+| Loss | Triplet (margin=0.3) + Hard Mining |
+| Sampler | MPerClassSampler (m=2) |
+| Training | 4-phase progressive unfreezing, 63 epochs, ~2.0h |
+| Embedding | 768-d, L2-normalized |
+| Files | `dinov3_vitsplus_efficient/` |
+### Comparison
+| Metric | Best Accuracy | Most Efficient | Difference |
+|--------|--------------|----------------|------------|
+| Top-1 | 86.6% | 81.1% | -5.5% |
+| Parameters | ~86.6M | ~22M | **-75%** |
+| Model size | 339 MB | 112 MB | **-67%** |
+| Training time | ~7.2h | ~2.0h | **-72%** |
+| Inference tokens | 1369 (patch14) | 1024 (patch16) | -25% |
+## Quick Start
+```bash
+# Install dependencies (standalone, no coral_reid needed)
+uv sync
+# Extract features from a single image
+uv run python extract_features.py \
+    --model e3_01b_dinov2_vitb_best/best_model_20260308_110634.pt \
+    --input /path/to/image.jpg
+# Extract features from a directory
+uv run python extract_features.py \
+    --model e3_01b_dinov2_vitb_best/best_model_20260308_110634.pt \
+    --input /path/to/images/ \
+    --output features.h5
+```
+## N-Benchmark Per-Area Results
+### E3-01b DINOv2 ViT-B/14 (Best)
+| Area | Queries | Top-1 | Top-3 | Top-5 | Avg Rank |
+|------|---------|-------|-------|-------|----------|
+| 37 | 32 | 93.8% | 96.9% | 96.9% | 1.28 |
+| 38 | 31 | 80.6% | 100.0% | 100.0% | 1.19 |
+| 39 | 27 | 85.2% | 92.6% | 96.3% | 1.44 |
+| 40 | 37 | 86.5% | 97.3% | 97.3% | 1.30 |
+| **Overall** | **127** | **86.6%** | **96.9%** | **97.6%** | **1.30** |
+### DINOv3 ViT-S+/16 (Efficient)
+| Area | Queries | Top-1 | Top-3 | Top-5 | Avg Rank |
+|------|---------|-------|-------|-------|----------|
+| 37 | 32 | 81.2% | 93.8% | 96.9% | 1.56 |
+| 38 | 31 | 77.4% | 90.3% | 93.5% | 1.90 |
+| 39 | 27 | 85.2% | 92.6% | 96.3% | 1.37 |
+| 40 | 37 | 81.1% | 91.9% | 94.6% | 1.57 |
+| **Overall** | **127** | **81.1%** | **92.1%** | **95.3%** | **1.61** |
+## Full Model History
+### Model Comparison Table
+| Model Name | Arch | Backbone | Loss | Mining | Same Area Neg | Image | Test Acc | Test Loss | Val Loss | N-Bench Avg | A37 | A38 | A39 | A40 | Time |
+| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |
+| Pre-trained | - | DINOv2-B/14 | - | - | - | bbox | - | - | - | 29.48% | 28.12% | 35.48% | 29.63% | 24.32% | - |
+| 20250812_152526 | old | DINOv2-B/14 | Triplet | pre-composed | ❌ | bbox | 92.6% | 0.1659 | - | 48.25% | 50.00% | 51.61% | 48.15% | 43.24% | ~16h |
+| 20251007_133126 | old | DINOv2-B/14 | Triplet | pre-composed | ✅ | bbox | 88.8% | 0.2523 | - | 39.32% | 46.88% | 41.94% | 33.33% | 35.14% | ~16h |
+| 20251008_094017 | old | DINOv2-B/14 | Triplet | pre-composed | ✅ | bbox | 90.4% | 0.1636 | - | 40.19% | 37.50% | 48.39% | 37.04% | 37.84% | ~16h |
+| 20251014_183603 | old | DINOv2-B/14 | Triplet | pre-composed | ❌ | bbox | 92.8% | 0.1012 | - | 40.97% | 37.50% | 38.71% | 44.44% | 43.24% | ~16h |
+| Pre-trained | - | DINOv2-B/14 | - | - | - | whole | - | - | - | 50.88% | 34.38% | 54.84% | 62.96% | 51.35% | - |
+| 20251015_165008 | old | DINOv2-B/14 | Triplet | pre-composed | ✅ | whole | 92.7% | 0.1330 | 0.1006 | 64.43% | 62.50% | 61.29% | 55.56% | 78.38% | ~16h |
+| 20251016_133229 | old | DINOv2-B/14 | Triplet | pre-composed | ❌ | whole | 97.9% | 0.0429 | - | 63.31% | 56.25% | 58.06% | 74.07% | 64.86% | ~16h |
+| **20260308_110634** | **new** | **DINOv2-B/14 (timm 518)** | **Triplet** | **dynamic (PML)** | **AreaAware 0.75** | whole | - | - | **0.1604** | **86.6%** | **93.8%** | **80.6%** | **85.2%** | **86.5%** | **~7.2h** |
+| **20260306_233824** | **new** | **DINOv3-S+/16 (timm 512)** | **Triplet** | **dynamic (PML)** | ❌ | whole | - | - | **0.1604** | **81.1%** | **81.2%** | **77.4%** | **85.2%** | **81.1%** | **~2.0h** |
+### Column Descriptions
+| Column | Description |
+| --- | --- |
+| Arch | `old` = old_repo implementation, `new` = refactored modular architecture |
+| Backbone | Feature extractor (DINOv2-B/14, DINOv3-S+/16, etc.) |
+| Loss | Loss function (Triplet, ArcFace, CosFace, Circle, Contrastive, etc.) |
+| Mining | Sample mining: `pre-composed` = fixed triplets, `dynamic (PML)` = MPerClassSampler |
+| Same Area Neg | Whether negatives restricted to same geographic area (`AreaAware 0.75` = 75% same area) |
+| Image | `bbox` = EXIF bounding box crop, `whole` = full image |
+| Test Acc | Test set accuracy (old arch only, measures pos_dist < neg_dist) |
+| Val Loss | Best validation loss during training |
+| N-Bench Avg | N-Benchmark Top-1 accuracy averaged across areas 37-40 |
+### Architecture Differences
+| Feature | Old Architecture | New Architecture |
+| --- | --- | --- |
+| Dataset Output | `(anchor, pos, neg)` - 3 images | `(image, label)` - 1 image |
+| Triplet Formation | Pre-composed before training | Dynamic mining per batch |
+| Batch Sampler | Random | MPerClassSampler (m=2) |
+| Loss Function | Custom TripletLossWithMining | PML TripletMarginLoss |
+| Samples per Epoch | ~50,000 triplets x 3 images | ~4,000 images |
+| Training Speed | ~23 min/epoch | ~1.5 min/epoch |
+| Same Area Negatives | Implemented | Implemented (AreaAwareSampler) |
+> **N-Benchmark (Nearest Benchmark)**: Top-1 accuracy rate of identifying the correct coral when comparing specimens in areas 37-40 across 2022 and 2023.
+## Project Structure
+```
+coral_models/
+├── pyproject.toml                        # uv environment (standalone)
+├── extract_features.py                   # Feature extraction script
+├── e3_01b_dinov2_vitb_best/              # Best accuracy model (86.6%)
+│   ├── best_model_20260308_110634.pt
+│   ├── final_model_20260308_110634.pt
+│   ├── e3_01b_same_area_neg_075.yaml
+│   ├── README.md
+│   └── README(zh-tw).md
+├── dinov3_vitsplus_efficient/            # Most efficient model (81.1%)
+│   ├── best_model_20260306_233824.pt
+│   ├── final_model_20260306_233824.pt
+│   ├── dinov3_vitsplus_tune_02_p2lr5_4ph.yaml
+│   ├── README.md
+│   └── README(zh-tw).md
+└── legacy/                              # Old architecture models (torch.hub, 224×224)
+    ├── dinov2_coral_best_model_20251015_165008.pt      # 64.43% (old best, same area neg, whole image)
+    └── dinov2_coral_best_model_20251016_133229.pt      # 63.31% (no same area neg, whole image)
+```
+## License
+This project is licensed under GPL-3.0.
+Based on DINOv2 and DINOv3 by Meta Platforms, Inc. (Apache License 2.0).

dinov3_vitsplus_efficient/README(zh-tw).md ADDED Viewed

	@@ -0,0 +1,111 @@

+---
+license: gpl-3.0
+tags:
+  - coral-reef
+  - re-identification
+  - metric-learning
+  - dinov3
+  - pytorch
+datasets:
+  - custom
+pipeline_tag: image-feature-extraction
+---
+# 珊瑚個體辨識：DINOv3 ViT-S+/16（高效率）
+針對水下珊瑚個體辨識微調的 DINOv3 ViT-S+/16 模型。此為本專案中的**最高效率模型**，僅以約 22M 參數和約 2 小時訓練時間，達到 **81.1% N-Benchmark Top-1 準確率**。
+## 模型規格
+| | |
+|---|---|
+| **架構** | DINOv3 ViT-S+/16 (~22M 參數) |
+| **Backbone 載入方式** | timm (`vit_small_plus_patch16_dinov3`) |
+| **輸入尺寸** | 512 x 512 |
+| **嵌入維度** | 768 |
+| **Backbone 輸出維度** | 384 |
+| **Head** | MLP (384 → 512 → 768, BatchNorm, Dropout 0.3) |
+## 訓練配置
+| | |
+|---|---|
+| **損失函數** | Triplet Loss (margin=0.3) + Hard Mining |
+| **取樣器** | MPerClassSampler (m=2) |
+| **批次大小** | 16（累積步數：8，等效批次：128） |
+| **優化器** | AdamW (weight_decay=1e-4) |
+| **梯度裁剪** | 1.0 |
+| **Early stopping** | patience=6, delta=0.0005 |
+| **總 epochs** | 63 |
+| **訓練時間** | 約 2.0 小時（單 GPU） |
+### 漸進式解凍（4 階段）
+| 階段 | 解凍層數 | 學習率 | 最大 Epochs |
+|------|----------|--------|-------------|
+| 1 — 僅 Head | 0（僅 head） | 3e-4 | 20 |
+| 2 — 最後 2 blocks | 2 | 5e-5 | 20 |
+| 3 — 最後 4 blocks | 4 | 1.5e-5 | 15 |
+| 4 — 最後 6 blocks | 6 | 1e-5 | 15 |
+Phase 2 學習率從預設的 8e-5 降至 5e-5，避免 early stopping 過早觸發，讓 Phase 3 有更好的起點。Phase 4 進一步釋放模型容量。
+## 評估結果（N-Benchmark）
+跨年匹配：2022（參考集）vs 2023（查詢集），區域 37-40。
+| 區域 | 查詢數 | Top-1 | Top-3 | Top-5 | 平均排名 |
+|------|--------|-------|-------|-------|----------|
+| 37 | 32 | 81.2% | 93.8% | 96.9% | 1.56 |
+| 38 | 31 | 77.4% | 90.3% | 93.5% | 1.90 |
+| 39 | 27 | 85.2% | 92.6% | 96.3% | 1.37 |
+| 40 | 37 | 81.1% | 91.9% | 94.6% | 1.57 |
+| **整體** | **127** | **81.1%** | **92.1%** | **95.3%** | **1.61** |
+- **驗證損失**：0.1604
+## 與最強模型的比較
+| 指標 | 最強模型 (DINOv2 ViT-B) | 本模型 | 差距 |
+|------|------------------------|--------|------|
+| Top-1 | 86.6% | 81.1% | -5.5% |
+| 參數量 | ~86.6M | ~22M | **-75%** |
+| 訓練時間 | ~7.2h | ~2.0h | **-72%** |
+| 模型檔案大小 | 339 MB | 112 MB | **-67%** |
+| 推論 tokens | 1369 (patch14) | 1024 (patch16) | -25% |
+## 檔案說明
+| 檔案 | 說明 |
+|------|------|
+| `best_model_20260306_233824.pt` | 最佳 checkpoint（訓練期間最低驗證損失） |
+| `final_model_20260306_233824.pt` | 最終 checkpoint（最後一個 epoch） |
+| `dinov3_vitsplus_tune_02_p2lr5_4ph.yaml` | 完整訓練配置 |
+## 使用方式
+```python
+import torch
+from coral_reid.config import ExperimentConfig
+from coral_reid.models.coral_model import CoralReIDModel
+config = ExperimentConfig.from_yaml("dinov3_vitsplus_tune_02_p2lr5_4ph.yaml")
+model = CoralReIDModel.from_config(config.backbone, config.head)
+model.load("best_model_20260306_233824.pt", map_location="cpu")
+model.eval()
+# 提取嵌入向量
+embedding = model(image_tensor)  # (1, 768)
+```
+或使用獨立腳本（不需要 `coral_reid`）：
+```bash
+uv run python extract_features.py \
+    --model dinov3_vitsplus_efficient/best_model_20260306_233824.pt \
+    --input /path/to/image.jpg
+```
+## 引用
+本模型為珊瑚個體辨識研究的一部分，用於小琉球、綠島及東北角珊瑚礁的長期生態監測。

dinov3_vitsplus_efficient/README.md ADDED Viewed

	@@ -0,0 +1,111 @@

+---
+license: gpl-3.0
+tags:
+  - coral-reef
+  - re-identification
+  - metric-learning
+  - dinov3
+  - pytorch
+datasets:
+  - custom
+pipeline_tag: image-feature-extraction
+---
+# Coral Re-ID: DINOv3 ViT-S+/16 (Efficient)
+Fine-tuned DINOv3 ViT-S+/16 for underwater coral individual re-identification. This is the **most efficient model** in the project, achieving **81.1% N-Benchmark Top-1 accuracy** with only ~22M parameters and ~2h training time.
+## Model Details
+| | |
+|---|---|
+| **Architecture** | DINOv3 ViT-S+/16 (~22M params) |
+| **Backbone loader** | timm (`vit_small_plus_patch16_dinov3`) |
+| **Input size** | 512 x 512 |
+| **Embedding dim** | 768 |
+| **Backbone output dim** | 384 |
+| **Head** | MLP (384 → 512 → 768, BatchNorm, Dropout 0.3) |
+## Training Configuration
+| | |
+|---|---|
+| **Loss** | Triplet Loss (margin=0.3) + Hard Mining |
+| **Sampler** | MPerClassSampler (m=2) |
+| **Batch size** | 16 (accumulation steps: 8, effective batch: 128) |
+| **Optimizer** | AdamW (weight_decay=1e-4) |
+| **Gradient clipping** | 1.0 |
+| **Early stopping** | patience=6, delta=0.0005 |
+| **Total epochs** | 63 |
+| **Training time** | ~2.0 hours (single GPU) |
+### Progressive Unfreezing (4-phase)
+| Phase | Layers | LR | Max Epochs |
+|-------|--------|----|------------|
+| 1 — Head only | 0 (head only) | 3e-4 | 20 |
+| 2 — Last 2 blocks | 2 | 5e-5 | 20 |
+| 3 — Last 4 blocks | 4 | 1.5e-5 | 15 |
+| 4 — Last 6 blocks | 6 | 1e-5 | 15 |
+Phase 2 LR was reduced from the default 8e-5 to 5e-5 to prevent early stopping from triggering too soon, giving Phase 3 a better starting point. Phase 4 then further unlocks the model's capacity.
+## Evaluation Results (N-Benchmark)
+Cross-year matching: 2022 (reference) vs 2023 (query), areas 37-40.
+| Area | Queries | Top-1 | Top-3 | Top-5 | Avg Rank |
+|------|---------|-------|-------|-------|----------|
+| 37 | 32 | 81.2% | 93.8% | 96.9% | 1.56 |
+| 38 | 31 | 77.4% | 90.3% | 93.5% | 1.90 |
+| 39 | 27 | 85.2% | 92.6% | 96.3% | 1.37 |
+| 40 | 37 | 81.1% | 91.9% | 94.6% | 1.57 |
+| **Overall** | **127** | **81.1%** | **92.1%** | **95.3%** | **1.61** |
+- **Val loss**: 0.1604
+## Comparison with Best Model
+| Metric | Best (DINOv2 ViT-B) | This model | Difference |
+|--------|---------------------|------------|------------|
+| Top-1 | 86.6% | 81.1% | -5.5% |
+| Parameters | ~86.6M | ~22M | **-75%** |
+| Training time | ~7.2h | ~2.0h | **-72%** |
+| Model file size | 339 MB | 112 MB | **-67%** |
+| Inference tokens | 1369 (patch14) | 1024 (patch16) | -25% |
+## Files
+| File | Description |
+|------|-------------|
+| `best_model_20260306_233824.pt` | Best checkpoint (lowest val loss during training) |
+| `final_model_20260306_233824.pt` | Final checkpoint (last epoch) |
+| `dinov3_vitsplus_tune_02_p2lr5_4ph.yaml` | Full training config |
+## Usage
+```python
+import torch
+from coral_reid.config import ExperimentConfig
+from coral_reid.models.coral_model import CoralReIDModel
+config = ExperimentConfig.from_yaml("dinov3_vitsplus_tune_02_p2lr5_4ph.yaml")
+model = CoralReIDModel.from_config(config.backbone, config.head)
+model.load("best_model_20260306_233824.pt", map_location="cpu")
+model.eval()
+# Extract embedding
+embedding = model(image_tensor)  # (1, 768)
+```
+Or with the standalone script (no `coral_reid` dependency):
+```bash
+uv run python extract_features.py \
+    --model dinov3_vitsplus_efficient/best_model_20260306_233824.pt \
+    --input /path/to/image.jpg
+```
+## Citation
+Part of the coral re-identification research for long-term ecological monitoring at Xiaoliuqiu, Green Island, and Northeastern Taiwan.

dinov3_vitsplus_efficient/best_model_20260306_233824.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c4f525e9298f0d7b773e4736ad15bffbbfce88f5f5431b7ca4062cc55beed51f
+size 117200249

dinov3_vitsplus_efficient/dinov3_vitsplus_tune_02_p2lr5_4ph.yaml ADDED Viewed

	@@ -0,0 +1,75 @@

+# DINOv3 ViT-S+ Triplet 調參 Tune-02
+# 問題：同 Tune-01，Phase 2 早收斂 + Phase 3 高基線，且尚未嘗試深化解凍
+# 策略：Tune-01 基礎上新增 Phase 4（6 blocks，LR=1e-5，15ep）
+#       測試在 Phase 2/3 改善後，Phase 4 是否能進一步提升
+# 基準（E1-13）：DINOv3 ViT-S+ Triplet 74.0%
+name: dinov3_vitsplus_tune_02_p2lr5_4ph
+seed: 42
+device: cuda
+output_dir: outputs
+backbone:
+  name: timm
+  variant: vit_small_plus_patch16_dinov3
+  pretrained: true
+  freeze: true
+  output_dim: 384
+  img_size: 512
+head:
+  name: mlp
+  input_dim: 384
+  hidden_dim: 512
+  output_dim: 768
+  dropout: 0.3
+  use_batchnorm: true
+loss:
+  name: triplet
+  margin: 0.3
+  mining_strategy: hard
+data:
+  root_dirs:
+  - /home/yuc/code/data/coral
+  use_whole_image: true
+  same_area_negatives: false
+  image_size: 512
+  train_ratio: 0.7
+  val_ratio: 0.15
+  test_ratio: 0.15
+  num_workers: 4
+training:
+  batch_size: 16
+  accumulation_steps: 8
+  learning_rate: 0.0003
+  weight_decay: 0.0001
+  early_stopping_patience: 6
+  early_stopping_delta: 0.0005
+  scheduler_patience: 3
+  scheduler_factor: 0.5
+  min_lr: 1.0e-06
+  gradient_clip_norm: 1.0
+phases:
+- name: 'Phase 1: Head Only'
+  epochs: 20
+  learning_rate: 3.0e-04
+  unfreeze_backbone: false
+  unfreeze_layers: 0
+- name: 'Phase 2: Head + Last 2 Blocks (LR=5e-5, 20ep)'
+  epochs: 20
+  learning_rate: 5.0e-05
+  unfreeze_backbone: true
+  unfreeze_layers: 2
+- name: 'Phase 3: Head + Last 4 Blocks (LR=1.5e-5)'
+  epochs: 15
+  learning_rate: 1.5e-05
+  unfreeze_backbone: true
+  unfreeze_layers: 4
+- name: 'Phase 4: Head + Last 6 Blocks (LR=1e-5)'
+  epochs: 15
+  learning_rate: 1.0e-05
+  unfreeze_backbone: true
+  unfreeze_layers: 6

dinov3_vitsplus_efficient/final_model_20260306_233824.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:58150a259af088911b9e3a03ad50b3feb82a9ee57ef331adc23df61f21f3d3df
+size 117200455

e3_01b_dinov2_vitb_best/README(zh-tw).md ADDED Viewed

	@@ -0,0 +1,104 @@

+---
+license: gpl-3.0
+tags:
+  - coral-reef
+  - re-identification
+  - metric-learning
+  - dinov2
+  - pytorch
+datasets:
+  - custom
+pipeline_tag: image-feature-extraction
+---
+# 珊瑚個體辨識：DINOv2 ViT-B/14（最佳精度）
+針對水下珊瑚個體辨識微調的 DINOv2 ViT-B/14 模型。此為本專案中的**最強模型**，達到 **86.6% N-Benchmark Top-1 準確率**。
+## 模型規格
+| | |
+|---|---|
+| **架構** | DINOv2 ViT-B/14 (86.6M 參數) |
+| **Backbone 載入方式** | timm (`vit_base_patch14_dinov2`) |
+| **輸入尺寸** | 518 x 518 |
+| **嵌入維度** | 1280 |
+| **Backbone 輸出維度** | 768 |
+| **Head** | MLP (768 → 1024 → 1280, BatchNorm, Dropout 0.3) |
+## 訓練配置
+| | |
+|---|---|
+| **損失函數** | Triplet Loss (margin=0.3) + Hard Mining |
+| **取樣器** | AreaAwareSampler (area_ratio=0.75) |
+| **批次大小** | 16（累積步數：8，等效批次：128） |
+| **優化器** | AdamW (weight_decay=1e-4) |
+| **梯度裁剪** | 1.0 |
+| **Early stopping** | patience=6, delta=0.0005 |
+| **總 epochs** | 56 |
+| **訓練時間** | 約 7.2 小時（單 GPU） |
+### 漸進式解凍（4 階段）
+| 階段 | 解凍層數 | 學習率 | 最大 Epochs |
+|------|----------|--------|-------------|
+| 1 — 僅 Head | 0（僅 head） | 3e-4 | 20 |
+| 2 — 最後 2 blocks | 2 | 8e-5 | 15 |
+| 3 — 最後 4 blocks | 4 | 3e-5 | 12 |
+| 4 — 最後 6 blocks | 6 | 1e-5 | 15 |
+### AreaAwareSampler
+每個訓練批次由 75% 同區域珊瑚與 25% 跨區域珊瑚組成。此設計對齊 N-Benchmark 的評估方式（區域內匹配），提供來自同一珊瑚礁區域中視覺上相似的更困難負樣本。
+## 評估結果（N-Benchmark）
+跨年匹配：2022（參考集）vs 2023（查詢集），區域 37-40。
+| 區域 | 查詢數 | Top-1 | Top-3 | Top-5 | 平均排名 |
+|------|--------|-------|-------|-------|----------|
+| 37 | 32 | 93.8% | 96.9% | 96.9% | 1.28 |
+| 38 | 31 | 80.6% | 100.0% | 100.0% | 1.19 |
+| 39 | 27 | 85.2% | 92.6% | 96.3% | 1.44 |
+| 40 | 37 | 86.5% | 97.3% | 97.3% | 1.30 |
+| **整體** | **127** | **86.6%** | **96.9%** | **97.6%** | **1.30** |
+- **最差排名**：9（所有正確匹配均在前 9 名內）
+- **驗證損失**：0.1604
+## 檔案說明
+| 檔案 | 說明 |
+|------|------|
+| `best_model_20260308_110634.pt` | 最佳 checkpoint（訓練期間最低驗證損失） |
+| `final_model_20260308_110634.pt` | 最終 checkpoint（最後一個 epoch） |
+| `e3_01b_same_area_neg_075.yaml` | 完整訓練配置 |
+## 使用方式
+```python
+import torch
+from coral_reid.config import ExperimentConfig
+from coral_reid.models.coral_model import CoralReIDModel
+config = ExperimentConfig.from_yaml("e3_01b_same_area_neg_075.yaml")
+model = CoralReIDModel.from_config(config.backbone, config.head)
+model.load("best_model_20260308_110634.pt", map_location="cpu")
+model.eval()
+# 提取嵌入向量
+embedding = model(image_tensor)  # (1, 1280)
+```
+或使用獨立腳本（不需要 `coral_reid`）：
+```bash
+uv run python extract_features.py \
+    --model e3_01b_dinov2_vitb_best/best_model_20260308_110634.pt \
+    --input /path/to/image.jpg
+```
+## 引用
+本模型為珊瑚個體辨識研究的一部分，用於小琉球、綠島及東北角珊瑚礁的長期生態監測。

e3_01b_dinov2_vitb_best/README.md ADDED Viewed

	@@ -0,0 +1,104 @@

+---
+license: gpl-3.0
+tags:
+  - coral-reef
+  - re-identification
+  - metric-learning
+  - dinov2
+  - pytorch
+datasets:
+  - custom
+pipeline_tag: image-feature-extraction
+---
+# Coral Re-ID: DINOv2 ViT-B/14 (Best Accuracy)
+Fine-tuned DINOv2 ViT-B/14 for underwater coral individual re-identification. This is the **strongest model** in the project, achieving **86.6% N-Benchmark Top-1 accuracy**.
+## Model Details
+| | |
+|---|---|
+| **Architecture** | DINOv2 ViT-B/14 (86.6M params) |
+| **Backbone loader** | timm (`vit_base_patch14_dinov2`) |
+| **Input size** | 518 x 518 |
+| **Embedding dim** | 1280 |
+| **Backbone output dim** | 768 |
+| **Head** | MLP (768 → 1024 → 1280, BatchNorm, Dropout 0.3) |
+## Training Configuration
+| | |
+|---|---|
+| **Loss** | Triplet Loss (margin=0.3) + Hard Mining |
+| **Sampler** | AreaAwareSampler (area_ratio=0.75) |
+| **Batch size** | 16 (accumulation steps: 8, effective batch: 128) |
+| **Optimizer** | AdamW (weight_decay=1e-4) |
+| **Gradient clipping** | 1.0 |
+| **Early stopping** | patience=6, delta=0.0005 |
+| **Total epochs** | 56 |
+| **Training time** | ~7.2 hours (single GPU) |
+### Progressive Unfreezing (4-phase)
+| Phase | Layers | LR | Max Epochs |
+|-------|--------|----|------------|
+| 1 — Head only | 0 (head only) | 3e-4 | 20 |
+| 2 — Last 2 blocks | 2 | 8e-5 | 15 |
+| 3 — Last 4 blocks | 4 | 3e-5 | 12 |
+| 4 — Last 6 blocks | 6 | 1e-5 | 15 |
+### AreaAwareSampler
+Each training batch is composed of 75% same-area corals and 25% cross-area corals. This aligns training distribution with the N-Benchmark evaluation protocol (within-area matching), providing harder negative examples from visually similar corals in the same reef area.
+## Evaluation Results (N-Benchmark)
+Cross-year matching: 2022 (reference) vs 2023 (query), areas 37-40.
+| Area | Queries | Top-1 | Top-3 | Top-5 | Avg Rank |
+|------|---------|-------|-------|-------|----------|
+| 37 | 32 | 93.8% | 96.9% | 96.9% | 1.28 |
+| 38 | 31 | 80.6% | 100.0% | 100.0% | 1.19 |
+| 39 | 27 | 85.2% | 92.6% | 96.3% | 1.44 |
+| 40 | 37 | 86.5% | 97.3% | 97.3% | 1.30 |
+| **Overall** | **127** | **86.6%** | **96.9%** | **97.6%** | **1.30** |
+- **Worst rank**: 9 (all correct matches within top 9)
+- **Val loss**: 0.1604
+## Files
+| File | Description |
+|------|-------------|
+| `best_model_20260308_110634.pt` | Best checkpoint (lowest val loss during training) |
+| `final_model_20260308_110634.pt` | Final checkpoint (last epoch) |
+| `e3_01b_same_area_neg_075.yaml` | Full training config |
+## Usage
+```python
+import torch
+from coral_reid.config import ExperimentConfig
+from coral_reid.models.coral_model import CoralReIDModel
+config = ExperimentConfig.from_yaml("e3_01b_same_area_neg_075.yaml")
+model = CoralReIDModel.from_config(config.backbone, config.head)
+model.load("best_model_20260308_110634.pt", map_location="cpu")
+model.eval()
+# Extract embedding
+embedding = model(image_tensor)  # (1, 1280)
+```
+Or with the standalone script (no `coral_reid` dependency):
+```bash
+uv run python extract_features.py \
+    --model e3_01b_dinov2_vitb_best/best_model_20260308_110634.pt \
+    --input /path/to/image.jpg
+```
+## Citation
+Part of the coral re-identification research for long-term ecological monitoring at Xiaoliuqiu, Green Island, and Northeastern Taiwan.

e3_01b_dinov2_vitb_best/best_model_20260308_110634.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b77d102cabc4611df9227f4fc1be9ddeb134459265259f0783ff7f28a8323cc0
+size 354830189

e3_01b_dinov2_vitb_best/e3_01b_same_area_neg_075.yaml ADDED Viewed

	@@ -0,0 +1,74 @@

+# E3-01b: Same Area Negatives (area_ratio=0.75)
+# 基準：dinov2_vitb_tune_02_4ph (84.3%)
+# 策略：AreaAwareSampler，每 batch 75% 同區域 + 25% 跨區域（更強 hard negatives）
+name: e3_01b_same_area_neg_075
+seed: 42
+device: cuda
+output_dir: outputs
+backbone:
+  name: timm
+  variant: vit_base_patch14_dinov2
+  pretrained: true
+  freeze: true
+  output_dim: 768
+  img_size: 518
+head:
+  name: mlp
+  input_dim: 768
+  hidden_dim: 1024
+  output_dim: 1280
+  dropout: 0.3
+  use_batchnorm: true
+loss:
+  name: triplet
+  margin: 0.3
+  mining_strategy: hard
+data:
+  root_dirs:
+  - /home/yuc/code/data/coral
+  use_whole_image: true
+  same_area_negatives: true
+  area_ratio: 0.75
+  image_size: 518
+  train_ratio: 0.7
+  val_ratio: 0.15
+  test_ratio: 0.15
+  num_workers: 4
+training:
+  batch_size: 16
+  accumulation_steps: 8
+  learning_rate: 0.0003
+  weight_decay: 0.0001
+  early_stopping_patience: 6
+  early_stopping_delta: 0.0005
+  scheduler_patience: 3
+  scheduler_factor: 0.5
+  min_lr: 1.0e-06
+  gradient_clip_norm: 1.0
+phases:
+- name: 'Phase 1: Head Only'
+  epochs: 20
+  learning_rate: 3.0e-04
+  unfreeze_backbone: false
+  unfreeze_layers: 0
+- name: 'Phase 2: Head + Last 2 Blocks'
+  epochs: 15
+  learning_rate: 8.0e-05
+  unfreeze_backbone: true
+  unfreeze_layers: 2
+- name: 'Phase 3: Head + Last 4 Blocks'
+  epochs: 12
+  learning_rate: 3.0e-05
+  unfreeze_backbone: true
+  unfreeze_layers: 4
+- name: 'Phase 4: Head + Last 6 Blocks (LR=1e-5)'
+  epochs: 15
+  learning_rate: 1.0e-05
+  unfreeze_backbone: true
+  unfreeze_layers: 6

e3_01b_dinov2_vitb_best/final_model_20260308_110634.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b6537022312141ca144fdd3c032826a4cbcb3a0117047df210e1bc07838d5996
+size 354830383

extract_features.py ADDED Viewed

	@@ -0,0 +1,486 @@

+"""Standalone feature extraction for coral re-identification models.
+Reconstructs the model architecture from checkpoint metadata (or a YAML config
+as fallback) and loads weights without depending on the coral_reid package.
+Usage:
+    # Extract features from a directory of images
+    uv run python extract_features.py \
+        --model e3_01b_dinov2_vitb_best/best_model_20260308_110634.pt \
+        --input /path/to/images \
+        --output features.h5
+    # Extract features for N-Benchmark (by area)
+    uv run python extract_features.py \
+        --model e3_01b_dinov2_vitb_best/best_model_20260308_110634.pt \
+        --input /path/to/2022sample \
+        --areas 37 38 39 40 \
+        --output features/
+    # Single image embedding (prints to stdout)
+    uv run python extract_features.py \
+        --model e3_01b_dinov2_vitb_best/best_model_20260308_110634.pt \
+        --input /path/to/single_image.jpg
+"""
+from __future__ import annotations
+import argparse
+import logging
+import os
+from dataclasses import dataclass
+from pathlib import Path
+import h5py
+import numpy as np
+import timm
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import yaml
+from PIL import Image
+from torchvision import transforms
+from tqdm import tqdm
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s [%(levelname)s] %(message)s",
+)
+logger = logging.getLogger(__name__)
+# ---------------------------------------------------------------------------
+# Configuration
+# ---------------------------------------------------------------------------
+@dataclass
+class ModelConfig:
+    """Model configuration parsed from YAML."""
+    # Backbone
+    backbone_variant: str
+    img_size: int
+    backbone_output_dim: int
+    # Head
+    hidden_dim: int
+    output_dim: int
+    dropout: float
+    use_batchnorm: bool
+    @classmethod
+    def from_dict(cls, d: dict) -> ModelConfig:
+        """Create config from a dict (embedded in checkpoint)."""
+        return cls(
+            backbone_variant=d["backbone_variant"],
+            img_size=d.get("img_size", 224),
+            backbone_output_dim=d["backbone_output_dim"],
+            hidden_dim=d["hidden_dim"],
+            output_dim=d["output_dim"],
+            dropout=d.get("dropout", 0.3),
+            use_batchnorm=d.get("use_batchnorm", True),
+        )
+    @classmethod
+    def from_yaml(cls, path: str | Path) -> ModelConfig:
+        with open(path) as f:
+            cfg = yaml.safe_load(f)
+        backbone = cfg["backbone"]
+        head = cfg["head"]
+        return cls(
+            backbone_variant=backbone["variant"],
+            img_size=backbone.get("img_size", 224),
+            backbone_output_dim=backbone["output_dim"],
+            hidden_dim=head["hidden_dim"],
+            output_dim=head["output_dim"],
+            dropout=head.get("dropout", 0.3),
+            use_batchnorm=head.get("use_batchnorm", True),
+        )
+# ---------------------------------------------------------------------------
+# Model Architecture (standalone reconstruction)
+# ---------------------------------------------------------------------------
+class MLPHead(nn.Module):
+    """MLP projection head with L2 normalization.
+    Architecture:
+        BatchNorm1d → Dropout(0.2)
+        → Linear → ReLU → Dropout → Linear → [BatchNorm1d]
+        → L2 Normalize
+    """
+    def __init__(
+        self,
+        input_dim: int,
+        hidden_dim: int,
+        output_dim: int,
+        dropout: float = 0.3,
+        use_batchnorm: bool = True,
+    ) -> None:
+        super().__init__()
+        self.feature_processor = nn.Sequential(
+            nn.BatchNorm1d(input_dim),
+            nn.Dropout(p=0.2),
+        )
+        layers: list[nn.Module] = [
+            nn.Linear(input_dim, hidden_dim),
+            nn.ReLU(),
+            nn.Dropout(p=dropout),
+            nn.Linear(hidden_dim, output_dim),
+        ]
+        if use_batchnorm:
+            layers.append(nn.BatchNorm1d(output_dim))
+        self.projection = nn.Sequential(*layers)
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x = self.feature_processor(x)
+        x = self.projection(x)
+        return F.normalize(x, p=2, dim=1)
+class CoralReIDModel(nn.Module):
+    """Coral re-identification model: timm backbone + MLP head."""
+    def __init__(self, config: ModelConfig) -> None:
+        super().__init__()
+        # Backbone: timm model with classification head removed
+        self.backbone = timm.create_model(
+            config.backbone_variant,
+            pretrained=False,  # weights come from checkpoint
+            num_classes=0,
+            img_size=config.img_size,
+        )
+        self.head = MLPHead(
+            input_dim=config.backbone_output_dim,
+            hidden_dim=config.hidden_dim,
+            output_dim=config.output_dim,
+            dropout=config.dropout,
+            use_batchnorm=config.use_batchnorm,
+        )
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        features = self.backbone(x)
+        return self.head(features)
+def load_model(
+    checkpoint_path: str | Path,
+    device: str | torch.device = "cpu",
+    config_path: str | Path | None = None,
+) -> tuple[CoralReIDModel, ModelConfig]:
+    """Load model from checkpoint file.
+    Model config is read from the checkpoint's ``model_config`` key.
+    If the checkpoint doesn't contain it, ``config_path`` (YAML) is used
+    as a fallback.
+    Args:
+        checkpoint_path: Path to the .pt checkpoint file.
+        device: Device to load the model on.
+        config_path: Optional path to a YAML config (fallback).
+    Returns:
+        Tuple of (model, config).
+    """
+    # Checkpoint is a dict with "model_state_dict" key
+    checkpoint = torch.load(checkpoint_path, map_location=device, weights_only=False)
+    # Resolve config: checkpoint-embedded > YAML fallback
+    if isinstance(checkpoint, dict) and "model_config" in checkpoint:
+        config = ModelConfig.from_dict(checkpoint["model_config"])
+    elif config_path is not None:
+        config = ModelConfig.from_yaml(config_path)
+    else:
+        raise ValueError(
+            "Checkpoint does not contain model_config and no --config provided. "
+            "Use embed_config.py to add config to the checkpoint, or pass --config."
+        )
+    model = CoralReIDModel(config)
+    if isinstance(checkpoint, dict) and "model_state_dict" in checkpoint:
+        state_dict = checkpoint["model_state_dict"]
+    else:
+        # Fallback: raw state_dict
+        state_dict = checkpoint
+    # Map keys: original uses "backbone.model.*", timm direct uses "backbone.*"
+    mapped_state_dict: dict[str, torch.Tensor] = {}
+    for key, value in state_dict.items():
+        if key.startswith("backbone.model."):
+            new_key = key.replace("backbone.model.", "backbone.", 1)
+        else:
+            new_key = key
+        mapped_state_dict[new_key] = value
+    model.load_state_dict(mapped_state_dict)
+    model.to(device)
+    model.eval()
+    logger.info(
+        f"Loaded model: {config.backbone_variant} "
+        f"({config.img_size}px, {config.output_dim}d embedding)"
+    )
+    return model, config
+# ---------------------------------------------------------------------------
+# Inference Transforms
+# ---------------------------------------------------------------------------
+def get_inference_transforms(image_size: int) -> transforms.Compose:
+    """Create inference transforms matching training pipeline."""
+    return transforms.Compose([
+        transforms.Resize(
+            (image_size, image_size),
+            interpolation=transforms.InterpolationMode.BICUBIC,
+        ),
+        transforms.ToTensor(),
+        transforms.Normalize(
+            mean=[0.485, 0.456, 0.406],
+            std=[0.229, 0.224, 0.225],
+        ),
+    ])
+# ---------------------------------------------------------------------------
+# Feature Extraction
+# ---------------------------------------------------------------------------
+@torch.no_grad()
+def extract_single(
+    model: CoralReIDModel,
+    img_path: str | Path,
+    transform: transforms.Compose,
+    device: str | torch.device,
+) -> np.ndarray | None:
+    """Extract feature embedding from a single image."""
+    try:
+        img = Image.open(img_path).convert("RGB")
+        tensor = transform(img).unsqueeze(0).to(device)
+        embedding = model(tensor)
+        return embedding.cpu().numpy().flatten()
+    except Exception as e:
+        logger.warning(f"Failed to process {img_path}: {e}")
+        return None
+@torch.no_grad()
+def extract_directory(
+    model: CoralReIDModel,
+    directory: str | Path,
+    transform: transforms.Compose,
+    device: str | torch.device,
+    batch_size: int = 32,
+) -> tuple[np.ndarray, list[str]]:
+    """Extract features from all images in a directory.
+    Returns:
+        Tuple of (features array [N, D], list of coral names).
+    """
+    directory = Path(directory)
+    image_files = sorted(
+        f
+        for f in os.listdir(directory)
+        if f.lower().endswith((".jpg", ".jpeg", ".png"))
+    )
+    if not image_files:
+        logger.warning(f"No images found in {directory}")
+        return np.array([]), []
+    features_list: list[np.ndarray] = []
+    coral_names: list[str] = []
+    for i in tqdm(range(0, len(image_files), batch_size), desc=str(directory)):
+        batch_files = image_files[i : i + batch_size]
+        batch_tensors: list[torch.Tensor] = []
+        batch_names: list[str] = []
+        for fname in batch_files:
+            try:
+                img = Image.open(directory / fname).convert("RGB")
+                batch_tensors.append(transform(img))
+                batch_names.append(os.path.splitext(fname)[0])
+            except Exception as e:
+                logger.warning(f"Skipping {fname}: {e}")
+        if batch_tensors:
+            batch = torch.stack(batch_tensors).to(device)
+            feats = model(batch).cpu().numpy()
+            features_list.append(feats)
+            coral_names.extend(batch_names)
+    if features_list:
+        features = np.concatenate(features_list, axis=0)
+    else:
+        features = np.array([])
+    return features, coral_names
+def save_features_h5(
+    path: str | Path,
+    features: np.ndarray,
+    coral_names: list[str],
+    metadata: dict[str, str | int | float] | None = None,
+) -> None:
+    """Save features to HDF5 file."""
+    path = Path(path)
+    path.parent.mkdir(parents=True, exist_ok=True)
+    with h5py.File(path, "w") as f:
+        f.create_dataset("features", data=features)
+        f.create_dataset(
+            "coral_names",
+            data=[name.encode("utf-8") for name in coral_names],
+        )
+        f.attrs["feature_dim"] = features.shape[1] if len(features.shape) > 1 else 0
+        f.attrs["num_samples"] = features.shape[0]
+        if metadata:
+            for key, value in metadata.items():
+                if value is not None:
+                    f.attrs[key] = value
+    logger.info(f"Saved {len(coral_names)} features to {path}")
+# ---------------------------------------------------------------------------
+# CLI
+# ---------------------------------------------------------------------------
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(
+        description="Standalone feature extraction for coral re-identification models",
+    )
+    parser.add_argument(
+        "--model",
+        required=True,
+        help="Path to model checkpoint (.pt)",
+    )
+    parser.add_argument(
+        "--config",
+        default=None,
+        help="Path to YAML config file (optional if config is embedded in checkpoint)",
+    )
+    parser.add_argument(
+        "--input",
+        required=True,
+        help="Path to image file or directory",
+    )
+    parser.add_argument(
+        "--output",
+        default=None,
+        help="Output path (.h5 file or directory for area mode)",
+    )
+    parser.add_argument(
+        "--areas",
+        nargs="+",
+        default=None,
+        help="Area IDs for N-Benchmark extraction (e.g., 37 38 39 40)",
+    )
+    parser.add_argument(
+        "--year",
+        default=None,
+        help="Year label for area mode filenames (e.g., 2022)",
+    )
+    parser.add_argument(
+        "--batch-size",
+        type=int,
+        default=32,
+        help="Batch size for extraction (default: 32)",
+    )
+    parser.add_argument(
+        "--device",
+        default="cuda" if torch.cuda.is_available() else "cpu",
+        help="Device (default: cuda if available)",
+    )
+    return parser.parse_args()
+def main() -> None:
+    args = parse_args()
+    input_path = Path(args.input)
+    # Load model
+    model, config = load_model(args.model, args.device, config_path=args.config)
+    transform = get_inference_transforms(config.img_size)
+    # --- Single image mode ---
+    if input_path.is_file():
+        embedding = extract_single(model, input_path, transform, args.device)
+        if embedding is not None:
+            print(f"Image: {input_path.name}")
+            print(f"Embedding shape: {embedding.shape}")
+            print(f"Embedding norm: {np.linalg.norm(embedding):.4f}")
+            if args.output:
+                np.save(args.output, embedding)
+                logger.info(f"Saved embedding to {args.output}")
+            else:
+                print(f"Embedding: {embedding[:8]}... (first 8 dims)")
+        return
+    # --- Area mode (N-Benchmark style) ---
+    if args.areas:
+        output_dir = Path(args.output) if args.output else Path("features")
+        output_dir.mkdir(parents=True, exist_ok=True)
+        for area_id in args.areas:
+            area_dir = input_path / area_id
+            if not area_dir.exists():
+                logger.warning(f"Area directory not found: {area_dir}")
+                continue
+            features, names = extract_directory(
+                model, area_dir, transform, args.device, args.batch_size,
+            )
+            if len(features) > 0:
+                if args.year:
+                    out_path = output_dir / f"features_{args.year}_{area_id}_whole.h5"
+                else:
+                    out_path = output_dir / f"features_{area_id}_whole.h5"
+                save_features_h5(
+                    out_path,
+                    features,
+                    names,
+                    {"area_id": area_id, "source_dir": str(area_dir)},
+                )
+        return
+    # --- Directory mode ---
+    if input_path.is_dir():
+        features, names = extract_directory(
+            model, input_path, transform, args.device, args.batch_size,
+        )
+        if len(features) > 0:
+            output_path = args.output or "features.h5"
+            save_features_h5(
+                output_path,
+                features,
+                names,
+                {"source_dir": str(input_path)},
+            )
+        else:
+            logger.error("No features extracted")
+        return
+    logger.error(f"Input path not found: {input_path}")
+if __name__ == "__main__":
+    main()

legacy/dinov2_coral_best_model_20251015_165008.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6eb998eb8a37c7a539a36c6b32485beb9ff3d8c1fca5d5aacfa8cb9aefbfd47b
+size 354828824

legacy/dinov2_coral_best_model_20251016_133229.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b3f8af48b28b22c23591f68a96144ce1fc4c3c597a5248476d59e5e0a2abaf26
+size 354828824

pyproject.toml ADDED Viewed

	@@ -0,0 +1,19 @@

+[project]
+name = "coral-models"
+version = "0.1.0"
+description = "Standalone inference for coral re-identification models"
+requires-python = ">=3.10"
+license = "GPL-3.0-or-later"
+dependencies = [
+    "torch>=2.0.0",
+    "torchvision>=0.15.0",
+    "timm>=1.0.0",
+    "h5py>=3.9.0",
+    "numpy>=1.24.0",
+    "pillow>=10.0.0",
+    "pyyaml>=6.0",
+    "tqdm>=4.65.0",
+]
+[tool.uv]
+package = false

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff