File size: 8,710 Bytes
c2d8817 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 | #!/usr/bin/env python3
"""
上传 NeuroScan AI 项目到 Hugging Face Hub
包括:
1. 代码仓库 (neuroscan-ai)
2. 模型仓库 (neuroscan-ai-models)
3. 数据集仓库 (neuroscan-ai-dataset)
"""
import os
import sys
from pathlib import Path
from huggingface_hub import HfApi, create_repo, upload_folder, upload_file
# 项目根目录
PROJECT_ROOT = Path(__file__).resolve().parent.parent
# HF Token - 从环境变量获取
HF_TOKEN = os.environ.get("HF_TOKEN")
if not HF_TOKEN:
raise ValueError("请设置环境变量 HF_TOKEN")
def get_api():
"""获取 HF API 实例"""
return HfApi(token=HF_TOKEN)
def upload_code_repo():
"""上传代码仓库"""
print("\n" + "=" * 60)
print("📦 上传代码仓库: neuroscan-ai")
print("=" * 60)
api = get_api()
repo_id = "cyd0806/neuroscan-ai"
# 创建仓库
try:
create_repo(
repo_id=repo_id,
token=HF_TOKEN,
repo_type="model",
exist_ok=True,
private=False
)
print(f"✅ 仓库已创建/存在: {repo_id}")
except Exception as e:
print(f"⚠️ 创建仓库: {e}")
# 要排除的文件和目录
ignore_patterns = [
"*.pyc",
"__pycache__",
".git",
".gitignore",
"data/raw/*",
"data/processed/*",
"data/cache/*",
"models/monai_bundles/*",
"models/ollama/*",
"logs/*",
"output/*",
"*.nii",
"*.nii.gz",
"*.pt",
"*.pth",
"*.ckpt",
"*.bin",
"*.safetensors",
".env",
"*.log",
"*.tmp",
]
# 上传代码
print("📤 上传代码文件...")
try:
upload_folder(
folder_path=str(PROJECT_ROOT),
repo_id=repo_id,
repo_type="model",
token=HF_TOKEN,
ignore_patterns=ignore_patterns,
commit_message="Upload NeuroScan AI code"
)
print(f"✅ 代码上传完成: https://huggingface.co/{repo_id}")
except Exception as e:
print(f"❌ 上传失败: {e}")
raise
def upload_models_repo():
"""上传模型仓库"""
print("\n" + "=" * 60)
print("🧠 上传模型仓库: neuroscan-ai-models")
print("=" * 60)
api = get_api()
repo_id = "cyd0806/neuroscan-ai-models"
models_dir = PROJECT_ROOT / "models" / "monai_bundles"
if not models_dir.exists():
print("⚠️ 模型目录不存在,跳过")
return
# 创建仓库
try:
create_repo(
repo_id=repo_id,
token=HF_TOKEN,
repo_type="model",
exist_ok=True,
private=False
)
print(f"✅ 仓库已创建/存在: {repo_id}")
except Exception as e:
print(f"⚠️ 创建仓库: {e}")
# 创建 README
readme_content = """---
license: apache-2.0
tags:
- medical-imaging
- segmentation
- monai
- ct-scan
---
# NeuroScan AI - Medical Imaging Models
This repository contains pretrained models for NeuroScan AI medical imaging analysis platform.
## Models
### wholeBody_ct_segmentation
- **Description**: Whole body CT segmentation model
- **Framework**: MONAI
- **Organs**: 104 anatomical structures
- **Input**: CT scan (NIfTI format)
## Usage
```python
from monai.bundle import download
# Download the model
download(name="wholeBody_ct_segmentation", bundle_dir="./models")
```
## License
Apache 2.0
## Citation
If you use these models, please cite NeuroScan AI project.
"""
readme_path = models_dir / "README.md"
with open(readme_path, 'w') as f:
f.write(readme_content)
# 上传模型
print("📤 上传模型文件...")
try:
upload_folder(
folder_path=str(models_dir),
repo_id=repo_id,
repo_type="model",
token=HF_TOKEN,
commit_message="Upload MONAI segmentation models"
)
print(f"✅ 模型上传完成: https://huggingface.co/{repo_id}")
except Exception as e:
print(f"❌ 上传失败: {e}")
raise
def upload_dataset_repo():
"""上传数据集仓库"""
print("\n" + "=" * 60)
print("📊 上传数据集仓库: neuroscan-ai-dataset")
print("=" * 60)
api = get_api()
repo_id = "cyd0806/neuroscan-ai-dataset"
# 检查数据目录
raw_dir = PROJECT_ROOT / "data" / "raw"
if not raw_dir.exists():
print("⚠️ 数据目录不存在,跳过")
return
# 创建数据集仓库
try:
create_repo(
repo_id=repo_id,
token=HF_TOKEN,
repo_type="dataset",
exist_ok=True,
private=False
)
print(f"✅ 仓库已创建/存在: {repo_id}")
except Exception as e:
print(f"⚠️ 创建仓库: {e}")
# 创建 README
readme_content = """---
license: cc-by-nc-4.0
task_categories:
- image-segmentation
tags:
- medical-imaging
- ct-scan
- lung
- registration
size_categories:
- 1K<n<10K
---
# NeuroScan AI - Medical Imaging Dataset
This dataset contains sample medical imaging data for the NeuroScan AI platform.
## Dataset Description
### Learn2Reg Lung CT
- **Source**: [Learn2Reg Challenge](https://zenodo.org/record/3835682)
- **Description**: Paired inspiration and expiration lung CT scans
- **Format**: NIfTI (.nii.gz)
- **Cases**: 20 pairs
- **License**: CC BY-NC 4.0
## Usage
```python
# Download using huggingface_hub
from huggingface_hub import snapshot_download
snapshot_download(
repo_id="ydchen0806/neuroscan-ai-dataset",
repo_type="dataset",
local_dir="./data"
)
```
## Data Structure
```
data/
├── raw/
│ ├── training/
│ │ ├── scans/
│ │ │ ├── case_001_insp.nii.gz
│ │ │ ├── case_001_exp.nii.gz
│ │ │ └── ...
│ │ └── lungMasks/
│ │ └── ...
│ └── Learn2Reg_training.zip
└── processed/
└── real_lung_001/
├── baseline.nii.gz
├── followup.nii.gz
└── ...
```
## License
CC BY-NC 4.0 (Non-commercial use only)
## Citation
Please cite the original Learn2Reg challenge if you use this data.
"""
readme_path = raw_dir.parent / "README.md"
with open(readme_path, 'w') as f:
f.write(readme_content)
# 上传数据集
print("📤 上传数据集文件(这可能需要较长时间)...")
try:
upload_folder(
folder_path=str(raw_dir.parent),
repo_id=repo_id,
repo_type="dataset",
token=HF_TOKEN,
ignore_patterns=["cache/*", "*.log", "*.tmp"],
commit_message="Upload Learn2Reg lung CT dataset"
)
print(f"✅ 数据集上传完成: https://huggingface.co/datasets/{repo_id}")
except Exception as e:
print(f"❌ 上传失败: {e}")
raise
def main():
"""主函数"""
import argparse
parser = argparse.ArgumentParser(description="上传 NeuroScan AI 到 Hugging Face Hub")
parser.add_argument("--code", action="store_true", help="上传代码仓库")
parser.add_argument("--models", action="store_true", help="上传模型仓库")
parser.add_argument("--dataset", action="store_true", help="上传数据集仓库")
parser.add_argument("--all", action="store_true", help="上传所有")
args = parser.parse_args()
print("=" * 60)
print("🚀 NeuroScan AI -> Hugging Face Hub 上传工具")
print("=" * 60)
# 验证 Token
try:
api = get_api()
user_info = api.whoami()
print(f"✅ 已登录: {user_info['name']}")
except Exception as e:
print(f"❌ Token 验证失败: {e}")
return 1
if args.all or (not args.code and not args.models and not args.dataset):
# 默认上传所有
upload_code_repo()
upload_models_repo()
upload_dataset_repo()
else:
if args.code:
upload_code_repo()
if args.models:
upload_models_repo()
if args.dataset:
upload_dataset_repo()
print("\n" + "=" * 60)
print("🎉 上传完成!")
print("=" * 60)
print("\n仓库地址:")
print(" 📦 代码: https://huggingface.co/cyd0806/neuroscan-ai")
print(" 🧠 模型: https://huggingface.co/cyd0806/neuroscan-ai-models")
print(" 📊 数据: https://huggingface.co/datasets/cyd0806/neuroscan-ai-dataset")
return 0
if __name__ == "__main__":
sys.exit(main())
|