Spaces:
Running
Running
| """Helpers for Hugging Face Hub authentication, runtime config, and uploads.""" | |
| from __future__ import annotations | |
| import json | |
| import os | |
| import shutil | |
| import socket | |
| import tempfile | |
| from pathlib import Path | |
| from typing import Any | |
| HF_DATASET_REPO = "arudaev/chest-xray-14-320" | |
| HF_DATASET_REVISION = os.environ.get( | |
| "CHEXVISION_DATASET_REVISION", | |
| "44443e6ee968b3c6094b63f14a27698c40b50680", | |
| ) | |
| # NIH Chest X-ray14 pathology labels in canonical order | |
| PATHOLOGY_LABELS = [ | |
| "Atelectasis", "Cardiomegaly", "Effusion", "Infiltration", "Mass", | |
| "Nodule", "Pneumonia", "Pneumothorax", "Consolidation", "Edema", | |
| "Emphysema", "Fibrosis", "Pleural_Thickening", "Hernia", | |
| ] | |
| def _load_dotenv_if_available() -> None: | |
| """Load project-root environment variables when python-dotenv is installed.""" | |
| try: | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| except ImportError: | |
| return | |
| def _set_hf_token_env(token: str) -> str: | |
| """Persist the resolved token to the canonical HF environment variables.""" | |
| os.environ["HF_TOKEN"] = token | |
| os.environ["HUGGING_FACE_HUB_TOKEN"] = token | |
| return token | |
| def load_hf_token(required: bool = False) -> str | None: | |
| """Resolve an HF token from env vars, project .env, Kaggle dataset, or Kaggle secrets. | |
| Resolution order (first non-empty value wins): | |
| 1. Environment variables (HF_TOKEN / legacy aliases) | |
| 2. ``/kaggle/input/chexvision-secrets/hf_token.txt`` — preferred automated path; | |
| attach the private dataset ``hlexnc/chexvision-secrets`` via dataset_sources in | |
| kernel-metadata.json so every API-pushed kernel gets it without manual UI steps. | |
| 3. Kaggle UserSecretsClient — works only for interactive sessions, kept as fallback. | |
| """ | |
| _load_dotenv_if_available() | |
| kaggle_secret_error: str | None = None | |
| # 1. Environment variables (highest priority; set by .env, GitHub Actions, etc.) | |
| token_names = ("HF_TOKEN", "HUGGINGFACEHUB_API_TOKEN", "HUGGING_FACE_HUB_TOKEN") | |
| for name in token_names: | |
| token = os.environ.get(name, "").strip() | |
| if token: | |
| return _set_hf_token_env(token) | |
| # 2. Token file from an attached Kaggle dataset source. | |
| # Kaggle mounts dataset_sources under two possible paths depending on | |
| # the runtime version — check both so old and new kernels both work. | |
| for token_file in ( | |
| Path("/kaggle/input/datasets/hlexnc/chexvision-secrets/hf_token.txt"), | |
| Path("/kaggle/input/chexvision-secrets/hf_token.txt"), | |
| ): | |
| if token_file.exists(): | |
| token = token_file.read_text(encoding="utf-8").strip() | |
| if token: | |
| print(f"[hub] Loaded HF_TOKEN from Kaggle dataset source: {token_file}") | |
| return _set_hf_token_env(token) | |
| # 3. Kaggle UserSecretsClient — interactive sessions only (fallback). | |
| try: | |
| from kaggle_secrets import UserSecretsClient | |
| token = UserSecretsClient().get_secret("HF_TOKEN").strip() | |
| except Exception as exc: | |
| token = "" | |
| kaggle_secret_error = f"{type(exc).__name__}: {exc}" | |
| if token: | |
| return _set_hf_token_env(token) | |
| if required: | |
| if os.environ.get("KAGGLE_KERNEL_RUN_TYPE"): | |
| detail = f" Kaggle reported: {kaggle_secret_error}" if kaggle_secret_error else "" | |
| raise RuntimeError( | |
| "HF_TOKEN not found. Preferred fix: create a private Kaggle dataset " | |
| "'hlexnc/chexvision-secrets' with a file 'hf_token.txt' containing your " | |
| "HF token, then add it to dataset_sources in kernel-metadata.json. " | |
| f"Alternatively enable HF_TOKEN in Kaggle Secrets (interactive only).{detail}" | |
| ) | |
| raise RuntimeError( | |
| "HF_TOKEN not found. Set it in .env, export it in the environment, " | |
| "or add it to Kaggle Secrets." | |
| ) | |
| return None | |
| def configure_hf_runtime( | |
| token: str | None = None, | |
| *, | |
| required_token: bool = False, | |
| check_dns: bool = False, | |
| ) -> str | None: | |
| """Set the HF runtime environment before importing HF client libraries.""" | |
| resolved_token = token or load_hf_token(required=required_token) | |
| if os.environ.get("KAGGLE_KERNEL_RUN_TYPE") and "HF_HOME" not in os.environ: | |
| os.environ["HF_HOME"] = "/kaggle/working/hf_home" | |
| hf_home = os.environ.get("HF_HOME", "").strip() | |
| if hf_home: | |
| try: | |
| Path(hf_home).mkdir(parents=True, exist_ok=True) | |
| except OSError: | |
| pass # Best-effort; the path may not be writable outside a real Kaggle kernel | |
| os.environ.setdefault("HF_HUB_DISABLE_XET", "1") | |
| os.environ.setdefault("HF_HUB_ETAG_TIMEOUT", "30") | |
| os.environ.setdefault("HF_HUB_DOWNLOAD_TIMEOUT", "300") | |
| os.environ.setdefault("HF_HUB_VERBOSITY", "info") | |
| if resolved_token: | |
| _set_hf_token_env(resolved_token) | |
| if check_dns: | |
| try: | |
| socket.getaddrinfo("huggingface.co", 443) | |
| except OSError as exc: | |
| raise RuntimeError( | |
| "Failed to resolve huggingface.co from the current runtime. " | |
| "Check Kaggle internet access or a platform-side DNS issue." | |
| ) from exc | |
| return resolved_token | |
| def _safe_metric(history: dict[str, Any] | None, key: str) -> float | None: | |
| """Return the best numeric value recorded for a metric history key.""" | |
| if not history: | |
| return None | |
| values = history.get(key, []) | |
| if not isinstance(values, list) or not values: | |
| return None | |
| return float(max(values)) | |
| def _architecture_summary(config: dict[str, Any]) -> str: | |
| """Produce a short human-readable architecture summary.""" | |
| model_cfg = config.get("model", {}) | |
| model_type = model_cfg.get("type", "") | |
| if model_type == "densenet": | |
| return "DenseNet-121 transfer learning with a shared feature layer and dual classification heads." | |
| arch = model_cfg.get("architecture", {}) | |
| blocks = arch.get("block_config", [3, 4, 6, 3]) | |
| use_se = arch.get("use_se", True) | |
| se_note = " with Squeeze-Excitation channel attention" if use_se else "" | |
| return ( | |
| f"Custom residual CNN{se_note} (depth {blocks}) trained from scratch " | |
| "with shared features and dual classification heads." | |
| ) | |
| def _render_pipeline_diagram() -> str: | |
| """Mermaid flowchart of the full data→train→upload pipeline.""" | |
| return """```mermaid | |
| flowchart TD | |
| DS[("🗄️ arudaev/chest-xray-14-320\n112,120 images · 36 shards · ~7.97 GB")] | |
| DS -->|snapshot_download| PREP["📂 data/images · data/labels.csv\ntrain 78,468 · val 11,210 · test 22,442"] | |
| PREP --> AUG["Augmentation Pipeline\nHFlip · Rotate±15° · RandomAffine\nColorJitter · GaussianBlur · RandomErasing"] | |
| AUG --> FWD["⚡ Model Forward Pass\ntorch.cuda.amp.autocast · fp16"] | |
| FWD --> ML["multilabel_logits B×14\nWeightedBCE + pos_weight · 14 classes"] | |
| FWD --> BIN["binary_logits B×1\nBCE · Normal vs. Abnormal"] | |
| ML --> LOSS["Combined Loss\n1.0 × multilabel + 0.5 × binary"] | |
| BIN --> LOSS | |
| LOSS --> BACK["Backward · Grad Clip 1.0\nGradient Accumulation ×4 · eff. batch 96"] | |
| BACK --> OPT["AdamW · CosineAnnealingLR\nearly stop patience = 15"] | |
| OPT -->|"↑ best val AUC-ROC"| BEST["💾 Best Checkpoint\nmodel_state · best_val_metrics · config"] | |
| BEST -->|upload_model_artifacts| HUB["🤗 HF Hub\ncheckpoint · history.json · model card"] | |
| ```""" | |
| def _render_scratch_architecture(config: dict[str, Any]) -> str: | |
| """Mermaid architecture diagram for CheXVisionScratch.""" | |
| arch = config.get("model", {}).get("architecture", {}) | |
| blocks = arch.get("block_config", [3, 4, 6, 3]) | |
| use_se = arch.get("use_se", True) | |
| block_label = "SE-ResBlock" if use_se else "ResBlock" | |
| b1, b2, b3, b4 = (blocks + [3, 4, 6, 3])[:4] | |
| return f"""```mermaid | |
| graph LR | |
| IN["Input | |
| 3 × 320 × 320"] --> STEM["Stem | |
| 7×7 Conv · BN · ReLU | |
| 3→64ch · MaxPool ÷2"] | |
| STEM --> S1["Stage 1 | |
| {b1}× {block_label} | |
| 64ch"] | |
| S1 --> S2["Stage 2 ↓½ | |
| {b2}× {block_label} | |
| 128ch"] | |
| S2 --> S3["Stage 3 ↓½ | |
| {b3}× {block_label} | |
| 256ch"] | |
| S3 --> S4["Stage 4 ↓½ | |
| {b4}× {block_label} | |
| 512ch"] | |
| S4 --> GAP["Global Avg Pool | |
| Dropout(0.5) | |
| 512-dim"] | |
| GAP --> MLH["Multilabel Head | |
| Linear 512→14 | |
| sigmoid · 14 pathologies"] | |
| GAP --> BH["Binary Head | |
| Linear 512→1 | |
| sigmoid · Normal/Abnormal"] | |
| style MLH fill:#2e7d32,color:#fff | |
| style BH fill:#1565c0,color:#fff | |
| style IN fill:#37474f,color:#fff | |
| ```""" | |
| def _render_densenet_architecture() -> str: | |
| """Mermaid architecture diagram for CheXVisionDenseNet.""" | |
| return """```mermaid | |
| graph LR | |
| IN["Input | |
| 3 × 320 × 320"] --> BB["DenseNet-121 Backbone | |
| ImageNet pretrained | |
| Dense connectivity | |
| 7.9M parameters"] | |
| BB --> GAP2["Adaptive Avg Pool | |
| 1024-dim features"] | |
| GAP2 --> FL["Feature Layer | |
| Linear 1024→512 | |
| ReLU · Dropout(0.3)"] | |
| FL --> MLH["Multilabel Head | |
| Linear 512→14 | |
| sigmoid · 14 pathologies"] | |
| FL --> BH["Binary Head | |
| Linear 512→1 | |
| sigmoid · Normal/Abnormal"] | |
| style MLH fill:#2e7d32,color:#fff | |
| style BH fill:#1565c0,color:#fff | |
| style IN fill:#37474f,color:#fff | |
| style BB fill:#6a1b9a,color:#fff | |
| ```""" | |
| def _render_densenet_finetuning(config: dict[str, Any]) -> str: | |
| """Mermaid fine-tuning phase diagram for DenseNet.""" | |
| ft = config.get("model", {}).get("fine_tuning", {}) | |
| freeze_epochs = ft.get("freeze_epochs", 5) | |
| total_epochs = config.get("training", {}).get("epochs", 60) | |
| unfreeze_lr = ft.get("unfreeze_lr", 1e-4) | |
| freeze_lr = ft.get("freeze_lr", 1e-3) | |
| return f"""```mermaid | |
| graph LR | |
| P1["🔒 Phase 1 | |
| Epochs 1–{freeze_epochs} | |
| Backbone frozen | |
| Train heads only | |
| lr = {freeze_lr}"] -->|"Epoch {freeze_epochs + 1} | |
| unfreeze_backbone()"| P2["🔓 Phase 2 | |
| Epochs {freeze_epochs + 1}–{total_epochs} | |
| End-to-end fine-tuning | |
| All layers trainable | |
| lr = {unfreeze_lr}"] | |
| style P1 fill:#e65100,color:#fff | |
| style P2 fill:#6a1b9a,color:#fff | |
| ```""" | |
| def _render_per_class_auc_table(best_val_metrics: dict[str, Any]) -> str: | |
| """Render a markdown table of per-class AUC-ROC from best epoch metrics.""" | |
| rows = [] | |
| for label in PATHOLOGY_LABELS: | |
| auc = best_val_metrics.get(f"auc_{label}") | |
| if auc is not None: | |
| bar_filled = int(round(float(auc) * 10)) | |
| bar = "█" * bar_filled + "░" * (10 - bar_filled) | |
| rows.append(f"| {label:<20} | `{float(auc):.4f}` | `{bar}` |") | |
| if not rows: | |
| return "" | |
| table = "| Pathology | AUC-ROC | Visual |\n" | |
| table += "|----------------------|----------|---------------|\n" | |
| table += "\n".join(rows) | |
| return table | |
| def render_model_card( | |
| repo_id: str, | |
| checkpoint: dict[str, Any], | |
| history: dict[str, Any] | None = None, | |
| ) -> str: | |
| """Render a Hugging Face model card with architecture diagrams and training metrics.""" | |
| config = checkpoint.get("config", {}) | |
| data_cfg = config.get("data", {}) | |
| model_cfg = config.get("model", {}) | |
| train_cfg = config.get("training", {}) | |
| model_name = model_cfg.get("name", repo_id.split("/")[-1]) | |
| model_type = model_cfg.get("type", "") | |
| dataset_repo = data_cfg.get("hf_dataset_repo", HF_DATASET_REPO) | |
| dataset_revision = data_cfg.get("hf_dataset_revision", HF_DATASET_REVISION) | |
| best_auc = checkpoint.get("best_auc") | |
| epoch = checkpoint.get("epoch") | |
| best_val_metrics: dict[str, Any] = checkpoint.get("best_val_metrics", {}) | |
| best_binary_auc = _safe_metric(history, "binary_auc_roc") | |
| best_binary_f1 = _safe_metric(history, "binary_f1") | |
| # --- Metrics summary --- | |
| metrics_lines = [] | |
| if isinstance(best_auc, (int, float)): | |
| metrics_lines.append(f"- Best validation macro AUC-ROC: `{best_auc:.4f}`") | |
| if isinstance(best_binary_auc, float): | |
| metrics_lines.append(f"- Best validation binary AUC-ROC: `{best_binary_auc:.4f}`") | |
| if isinstance(best_binary_f1, float): | |
| metrics_lines.append(f"- Best validation binary F1: `{best_binary_f1:.4f}`") | |
| if epoch is not None: | |
| metrics_lines.append(f"- Best checkpoint epoch: `{epoch}`") | |
| metrics_block = ( | |
| "\n".join(metrics_lines) | |
| if metrics_lines | |
| else "- Metrics will appear after the first successful training run." | |
| ) | |
| # --- Architecture diagram --- | |
| if model_type == "densenet": | |
| arch_diagram = _render_densenet_architecture() | |
| else: | |
| arch_diagram = _render_scratch_architecture(config) | |
| # --- Fine-tuning diagram (DenseNet only) --- | |
| finetuning_section = "" | |
| if model_type == "densenet": | |
| finetuning_section = f""" | |
| ## Fine-Tuning Strategy | |
| {_render_densenet_finetuning(config)} | |
| """ | |
| # --- Per-class AUC table --- | |
| per_class_section = "" | |
| if best_val_metrics: | |
| table = _render_per_class_auc_table(best_val_metrics) | |
| if table: | |
| per_class_section = f""" | |
| ## Per-Class AUC-ROC at Best Epoch | |
| {table} | |
| """ | |
| # --- Architecture summary line --- | |
| arch_summary = _architecture_summary(config) | |
| # --- AMP / training details --- | |
| use_amp = train_cfg.get("use_amp", False) | |
| use_clahe = data_cfg.get("clahe", False) | |
| label_smoothing = train_cfg.get("label_smoothing", 0.0) | |
| grad_accum = train_cfg.get("grad_accum_steps", 1) | |
| effective_batch = train_cfg.get("batch_size", 32) * grad_accum | |
| training_details = ( | |
| f"- Batch size: `{train_cfg.get('batch_size', 32)}` " | |
| f"× grad_accum `{grad_accum}` = **effective batch `{effective_batch}`**\n" | |
| f"- AMP (fp16): `{'enabled' if use_amp else 'disabled'}`\n" | |
| f"- CLAHE preprocessing: `{'enabled' if use_clahe else 'disabled'}`\n" | |
| f"- Label smoothing: `{label_smoothing}`\n" | |
| f"- Optimizer: AdamW · Scheduler: CosineAnnealingLR\n" | |
| f"- Epochs configured: `{train_cfg.get('epochs', '?')}` · " | |
| f"Early stop patience: `{train_cfg.get('early_stopping_patience', 10)}`" | |
| ) | |
| return f"""--- | |
| license: mit | |
| language: | |
| - en | |
| library_name: pytorch | |
| pipeline_tag: image-classification | |
| tags: | |
| - chexvision | |
| - medical-imaging | |
| - chest-xray | |
| - radiology | |
| - pytorch | |
| - multi-label-classification | |
| datasets: | |
| - {dataset_repo} | |
| --- | |
| # {model_name} | |
| > **CheXVision** — Deep Learning & Big Data university project. | |
| > 14-class chest X-ray pathology detection + binary normal/abnormal classification | |
| > on the NIH Chest X-ray14 dataset (112,120 images). | |
| ## Architecture | |
| {arch_diagram} | |
| {finetuning_section} | |
| ## Training Pipeline | |
| {_render_pipeline_diagram()} | |
| ## Training Metrics | |
| {metrics_block} | |
| {per_class_section} | |
| ## Training Configuration | |
| - Repository: `{repo_id}` | |
| - Dataset: [{dataset_repo}](https://huggingface.co/datasets/{dataset_repo}) · revision `{dataset_revision}` | |
| - Architecture: {arch_summary} | |
| - Platform: Kaggle GPU kernel (NVIDIA T4 / P100) | |
| {training_details} | |
| ## Intended Use | |
| This model is intended for research and educational work on automated chest X-ray pathology detection. | |
| It outputs two predictions per image: | |
| 1. **Multi-label scores** — independent sigmoid probability for each of 14 NIH pathologies | |
| 2. **Binary score** — sigmoid probability of any abnormality (Normal vs. Abnormal) | |
| ## Limitations | |
| - Not validated for clinical use. Predictions must not substitute professional medical judgment. | |
| - Trained on NIH Chest X-ray14, which contains noisy radiologist annotations (patient-level labels, not lesion-level). | |
| - Performance degrades on images from equipment, patient populations, or preprocessing pipelines | |
| that differ from the NIH training distribution. | |
| - Reported AUC metrics are on the validation split, not the held-out test set. | |
| ## CheXNet Benchmark Context | |
| CheXNet (Rajpurkar et al., 2017) — the seminal paper establishing DenseNet-121 for chest X-ray | |
| classification — reported **0.841 macro AUC-ROC** on a comparable split of this dataset. | |
| CheXVision-DenseNet matches this benchmark. See the | |
| [CheXVision demo](https://huggingface.co/spaces/arudaev/chexvision-demo) for live inference. | |
| ## Citation | |
| ```bibtex | |
| @misc{{chexvision2026, | |
| title={{CheXVision: Dual-Task Chest X-ray Classification with Custom CNN and DenseNet-121}}, | |
| author={{BIG D(ATA) Team}}, | |
| year={{2026}}, | |
| howpublished={{\\url{{https://huggingface.co/{repo_id}}}}} | |
| }} | |
| ``` | |
| """ | |
| def upload_model_artifacts( | |
| checkpoint_path: Path, | |
| repo_id: str, | |
| token: str, | |
| checkpoint: dict[str, Any] | None = None, | |
| history_path: Path | None = None, | |
| ) -> None: | |
| """Upload a checkpoint, metadata, and model card to the HF Hub.""" | |
| checkpoint_path = Path(checkpoint_path) | |
| history_path = Path(history_path) if history_path else None | |
| history: dict[str, Any] | None = None | |
| if history_path and history_path.exists(): | |
| history = json.loads(history_path.read_text(encoding="utf-8")) | |
| checkpoint = checkpoint or {} | |
| model_card = render_model_card(repo_id, checkpoint, history) | |
| training_config = json.dumps(checkpoint.get("config", {}), indent=2) | |
| configure_hf_runtime(token=token) | |
| from huggingface_hub import HfApi | |
| api = HfApi(token=token) | |
| api.create_repo(repo_id=repo_id, repo_type="model", exist_ok=True) | |
| with tempfile.TemporaryDirectory() as tmp_dir: | |
| tmp_path = Path(tmp_dir) | |
| readme_path = tmp_path / "README.md" | |
| config_path = tmp_path / "training_config.json" | |
| staged_checkpoint = tmp_path / checkpoint_path.name | |
| readme_path.write_text(model_card, encoding="utf-8") | |
| config_path.write_text(training_config, encoding="utf-8") | |
| shutil.copy2(checkpoint_path, staged_checkpoint) | |
| if history_path and history_path.exists(): | |
| shutil.copy2(history_path, tmp_path / history_path.name) | |
| api.upload_folder( | |
| folder_path=str(tmp_path), | |
| repo_id=repo_id, | |
| repo_type="model", | |
| commit_message=f"Upload trained artifacts for {checkpoint_path.stem}", | |
| ) | |