"""Helpers for Hugging Face Hub authentication, runtime config, and uploads.""" from __future__ import annotations import json import os import shutil import socket import tempfile from pathlib import Path from typing import Any HF_DATASET_REPO = "arudaev/chest-xray-14-320" HF_DATASET_REVISION = os.environ.get( "CHEXVISION_DATASET_REVISION", "44443e6ee968b3c6094b63f14a27698c40b50680", ) # NIH Chest X-ray14 pathology labels in canonical order PATHOLOGY_LABELS = [ "Atelectasis", "Cardiomegaly", "Effusion", "Infiltration", "Mass", "Nodule", "Pneumonia", "Pneumothorax", "Consolidation", "Edema", "Emphysema", "Fibrosis", "Pleural_Thickening", "Hernia", ] def _load_dotenv_if_available() -> None: """Load project-root environment variables when python-dotenv is installed.""" try: from dotenv import load_dotenv load_dotenv() except ImportError: return def _set_hf_token_env(token: str) -> str: """Persist the resolved token to the canonical HF environment variables.""" os.environ["HF_TOKEN"] = token os.environ["HUGGING_FACE_HUB_TOKEN"] = token return token def load_hf_token(required: bool = False) -> str | None: """Resolve an HF token from env vars, project .env, Kaggle dataset, or Kaggle secrets. Resolution order (first non-empty value wins): 1. Environment variables (HF_TOKEN / legacy aliases) 2. ``/kaggle/input/chexvision-secrets/hf_token.txt`` — preferred automated path; attach the private dataset ``hlexnc/chexvision-secrets`` via dataset_sources in kernel-metadata.json so every API-pushed kernel gets it without manual UI steps. 3. Kaggle UserSecretsClient — works only for interactive sessions, kept as fallback. """ _load_dotenv_if_available() kaggle_secret_error: str | None = None # 1. Environment variables (highest priority; set by .env, GitHub Actions, etc.) token_names = ("HF_TOKEN", "HUGGINGFACEHUB_API_TOKEN", "HUGGING_FACE_HUB_TOKEN") for name in token_names: token = os.environ.get(name, "").strip() if token: return _set_hf_token_env(token) # 2. Token file from an attached Kaggle dataset source. # Kaggle mounts dataset_sources under two possible paths depending on # the runtime version — check both so old and new kernels both work. for token_file in ( Path("/kaggle/input/datasets/hlexnc/chexvision-secrets/hf_token.txt"), Path("/kaggle/input/chexvision-secrets/hf_token.txt"), ): if token_file.exists(): token = token_file.read_text(encoding="utf-8").strip() if token: print(f"[hub] Loaded HF_TOKEN from Kaggle dataset source: {token_file}") return _set_hf_token_env(token) # 3. Kaggle UserSecretsClient — interactive sessions only (fallback). try: from kaggle_secrets import UserSecretsClient token = UserSecretsClient().get_secret("HF_TOKEN").strip() except Exception as exc: token = "" kaggle_secret_error = f"{type(exc).__name__}: {exc}" if token: return _set_hf_token_env(token) if required: if os.environ.get("KAGGLE_KERNEL_RUN_TYPE"): detail = f" Kaggle reported: {kaggle_secret_error}" if kaggle_secret_error else "" raise RuntimeError( "HF_TOKEN not found. Preferred fix: create a private Kaggle dataset " "'hlexnc/chexvision-secrets' with a file 'hf_token.txt' containing your " "HF token, then add it to dataset_sources in kernel-metadata.json. " f"Alternatively enable HF_TOKEN in Kaggle Secrets (interactive only).{detail}" ) raise RuntimeError( "HF_TOKEN not found. Set it in .env, export it in the environment, " "or add it to Kaggle Secrets." ) return None def configure_hf_runtime( token: str | None = None, *, required_token: bool = False, check_dns: bool = False, ) -> str | None: """Set the HF runtime environment before importing HF client libraries.""" resolved_token = token or load_hf_token(required=required_token) if os.environ.get("KAGGLE_KERNEL_RUN_TYPE") and "HF_HOME" not in os.environ: os.environ["HF_HOME"] = "/kaggle/working/hf_home" hf_home = os.environ.get("HF_HOME", "").strip() if hf_home: try: Path(hf_home).mkdir(parents=True, exist_ok=True) except OSError: pass # Best-effort; the path may not be writable outside a real Kaggle kernel os.environ.setdefault("HF_HUB_DISABLE_XET", "1") os.environ.setdefault("HF_HUB_ETAG_TIMEOUT", "30") os.environ.setdefault("HF_HUB_DOWNLOAD_TIMEOUT", "300") os.environ.setdefault("HF_HUB_VERBOSITY", "info") if resolved_token: _set_hf_token_env(resolved_token) if check_dns: try: socket.getaddrinfo("huggingface.co", 443) except OSError as exc: raise RuntimeError( "Failed to resolve huggingface.co from the current runtime. " "Check Kaggle internet access or a platform-side DNS issue." ) from exc return resolved_token def _safe_metric(history: dict[str, Any] | None, key: str) -> float | None: """Return the best numeric value recorded for a metric history key.""" if not history: return None values = history.get(key, []) if not isinstance(values, list) or not values: return None return float(max(values)) def _architecture_summary(config: dict[str, Any]) -> str: """Produce a short human-readable architecture summary.""" model_cfg = config.get("model", {}) model_type = model_cfg.get("type", "") if model_type == "densenet": return "DenseNet-121 transfer learning with a shared feature layer and dual classification heads." arch = model_cfg.get("architecture", {}) blocks = arch.get("block_config", [3, 4, 6, 3]) use_se = arch.get("use_se", True) se_note = " with Squeeze-Excitation channel attention" if use_se else "" return ( f"Custom residual CNN{se_note} (depth {blocks}) trained from scratch " "with shared features and dual classification heads." ) def _render_pipeline_diagram() -> str: """Mermaid flowchart of the full data→train→upload pipeline.""" return """```mermaid flowchart TD DS[("🗄️ arudaev/chest-xray-14-320\n112,120 images · 36 shards · ~7.97 GB")] DS -->|snapshot_download| PREP["📂 data/images · data/labels.csv\ntrain 78,468 · val 11,210 · test 22,442"] PREP --> AUG["Augmentation Pipeline\nHFlip · Rotate±15° · RandomAffine\nColorJitter · GaussianBlur · RandomErasing"] AUG --> FWD["⚡ Model Forward Pass\ntorch.cuda.amp.autocast · fp16"] FWD --> ML["multilabel_logits B×14\nWeightedBCE + pos_weight · 14 classes"] FWD --> BIN["binary_logits B×1\nBCE · Normal vs. Abnormal"] ML --> LOSS["Combined Loss\n1.0 × multilabel + 0.5 × binary"] BIN --> LOSS LOSS --> BACK["Backward · Grad Clip 1.0\nGradient Accumulation ×4 · eff. batch 96"] BACK --> OPT["AdamW · CosineAnnealingLR\nearly stop patience = 15"] OPT -->|"↑ best val AUC-ROC"| BEST["💾 Best Checkpoint\nmodel_state · best_val_metrics · config"] BEST -->|upload_model_artifacts| HUB["🤗 HF Hub\ncheckpoint · history.json · model card"] ```""" def _render_scratch_architecture(config: dict[str, Any]) -> str: """Mermaid architecture diagram for CheXVisionScratch.""" arch = config.get("model", {}).get("architecture", {}) blocks = arch.get("block_config", [3, 4, 6, 3]) use_se = arch.get("use_se", True) block_label = "SE-ResBlock" if use_se else "ResBlock" b1, b2, b3, b4 = (blocks + [3, 4, 6, 3])[:4] return f"""```mermaid graph LR IN["Input 3 × 320 × 320"] --> STEM["Stem 7×7 Conv · BN · ReLU 3→64ch · MaxPool ÷2"] STEM --> S1["Stage 1 {b1}× {block_label} 64ch"] S1 --> S2["Stage 2 ↓½ {b2}× {block_label} 128ch"] S2 --> S3["Stage 3 ↓½ {b3}× {block_label} 256ch"] S3 --> S4["Stage 4 ↓½ {b4}× {block_label} 512ch"] S4 --> GAP["Global Avg Pool Dropout(0.5) 512-dim"] GAP --> MLH["Multilabel Head Linear 512→14 sigmoid · 14 pathologies"] GAP --> BH["Binary Head Linear 512→1 sigmoid · Normal/Abnormal"] style MLH fill:#2e7d32,color:#fff style BH fill:#1565c0,color:#fff style IN fill:#37474f,color:#fff ```""" def _render_densenet_architecture() -> str: """Mermaid architecture diagram for CheXVisionDenseNet.""" return """```mermaid graph LR IN["Input 3 × 320 × 320"] --> BB["DenseNet-121 Backbone ImageNet pretrained Dense connectivity 7.9M parameters"] BB --> GAP2["Adaptive Avg Pool 1024-dim features"] GAP2 --> FL["Feature Layer Linear 1024→512 ReLU · Dropout(0.3)"] FL --> MLH["Multilabel Head Linear 512→14 sigmoid · 14 pathologies"] FL --> BH["Binary Head Linear 512→1 sigmoid · Normal/Abnormal"] style MLH fill:#2e7d32,color:#fff style BH fill:#1565c0,color:#fff style IN fill:#37474f,color:#fff style BB fill:#6a1b9a,color:#fff ```""" def _render_densenet_finetuning(config: dict[str, Any]) -> str: """Mermaid fine-tuning phase diagram for DenseNet.""" ft = config.get("model", {}).get("fine_tuning", {}) freeze_epochs = ft.get("freeze_epochs", 5) total_epochs = config.get("training", {}).get("epochs", 60) unfreeze_lr = ft.get("unfreeze_lr", 1e-4) freeze_lr = ft.get("freeze_lr", 1e-3) return f"""```mermaid graph LR P1["🔒 Phase 1 Epochs 1–{freeze_epochs} Backbone frozen Train heads only lr = {freeze_lr}"] -->|"Epoch {freeze_epochs + 1} unfreeze_backbone()"| P2["🔓 Phase 2 Epochs {freeze_epochs + 1}–{total_epochs} End-to-end fine-tuning All layers trainable lr = {unfreeze_lr}"] style P1 fill:#e65100,color:#fff style P2 fill:#6a1b9a,color:#fff ```""" def _render_per_class_auc_table(best_val_metrics: dict[str, Any]) -> str: """Render a markdown table of per-class AUC-ROC from best epoch metrics.""" rows = [] for label in PATHOLOGY_LABELS: auc = best_val_metrics.get(f"auc_{label}") if auc is not None: bar_filled = int(round(float(auc) * 10)) bar = "█" * bar_filled + "░" * (10 - bar_filled) rows.append(f"| {label:<20} | `{float(auc):.4f}` | `{bar}` |") if not rows: return "" table = "| Pathology | AUC-ROC | Visual |\n" table += "|----------------------|----------|---------------|\n" table += "\n".join(rows) return table def render_model_card( repo_id: str, checkpoint: dict[str, Any], history: dict[str, Any] | None = None, ) -> str: """Render a Hugging Face model card with architecture diagrams and training metrics.""" config = checkpoint.get("config", {}) data_cfg = config.get("data", {}) model_cfg = config.get("model", {}) train_cfg = config.get("training", {}) model_name = model_cfg.get("name", repo_id.split("/")[-1]) model_type = model_cfg.get("type", "") dataset_repo = data_cfg.get("hf_dataset_repo", HF_DATASET_REPO) dataset_revision = data_cfg.get("hf_dataset_revision", HF_DATASET_REVISION) best_auc = checkpoint.get("best_auc") epoch = checkpoint.get("epoch") best_val_metrics: dict[str, Any] = checkpoint.get("best_val_metrics", {}) best_binary_auc = _safe_metric(history, "binary_auc_roc") best_binary_f1 = _safe_metric(history, "binary_f1") # --- Metrics summary --- metrics_lines = [] if isinstance(best_auc, (int, float)): metrics_lines.append(f"- Best validation macro AUC-ROC: `{best_auc:.4f}`") if isinstance(best_binary_auc, float): metrics_lines.append(f"- Best validation binary AUC-ROC: `{best_binary_auc:.4f}`") if isinstance(best_binary_f1, float): metrics_lines.append(f"- Best validation binary F1: `{best_binary_f1:.4f}`") if epoch is not None: metrics_lines.append(f"- Best checkpoint epoch: `{epoch}`") metrics_block = ( "\n".join(metrics_lines) if metrics_lines else "- Metrics will appear after the first successful training run." ) # --- Architecture diagram --- if model_type == "densenet": arch_diagram = _render_densenet_architecture() else: arch_diagram = _render_scratch_architecture(config) # --- Fine-tuning diagram (DenseNet only) --- finetuning_section = "" if model_type == "densenet": finetuning_section = f""" ## Fine-Tuning Strategy {_render_densenet_finetuning(config)} """ # --- Per-class AUC table --- per_class_section = "" if best_val_metrics: table = _render_per_class_auc_table(best_val_metrics) if table: per_class_section = f""" ## Per-Class AUC-ROC at Best Epoch {table} """ # --- Architecture summary line --- arch_summary = _architecture_summary(config) # --- AMP / training details --- use_amp = train_cfg.get("use_amp", False) use_clahe = data_cfg.get("clahe", False) label_smoothing = train_cfg.get("label_smoothing", 0.0) grad_accum = train_cfg.get("grad_accum_steps", 1) effective_batch = train_cfg.get("batch_size", 32) * grad_accum training_details = ( f"- Batch size: `{train_cfg.get('batch_size', 32)}` " f"× grad_accum `{grad_accum}` = **effective batch `{effective_batch}`**\n" f"- AMP (fp16): `{'enabled' if use_amp else 'disabled'}`\n" f"- CLAHE preprocessing: `{'enabled' if use_clahe else 'disabled'}`\n" f"- Label smoothing: `{label_smoothing}`\n" f"- Optimizer: AdamW · Scheduler: CosineAnnealingLR\n" f"- Epochs configured: `{train_cfg.get('epochs', '?')}` · " f"Early stop patience: `{train_cfg.get('early_stopping_patience', 10)}`" ) return f"""--- license: mit language: - en library_name: pytorch pipeline_tag: image-classification tags: - chexvision - medical-imaging - chest-xray - radiology - pytorch - multi-label-classification datasets: - {dataset_repo} --- # {model_name} > **CheXVision** — Deep Learning & Big Data university project. > 14-class chest X-ray pathology detection + binary normal/abnormal classification > on the NIH Chest X-ray14 dataset (112,120 images). ## Architecture {arch_diagram} {finetuning_section} ## Training Pipeline {_render_pipeline_diagram()} ## Training Metrics {metrics_block} {per_class_section} ## Training Configuration - Repository: `{repo_id}` - Dataset: [{dataset_repo}](https://huggingface.co/datasets/{dataset_repo}) · revision `{dataset_revision}` - Architecture: {arch_summary} - Platform: Kaggle GPU kernel (NVIDIA T4 / P100) {training_details} ## Intended Use This model is intended for research and educational work on automated chest X-ray pathology detection. It outputs two predictions per image: 1. **Multi-label scores** — independent sigmoid probability for each of 14 NIH pathologies 2. **Binary score** — sigmoid probability of any abnormality (Normal vs. Abnormal) ## Limitations - Not validated for clinical use. Predictions must not substitute professional medical judgment. - Trained on NIH Chest X-ray14, which contains noisy radiologist annotations (patient-level labels, not lesion-level). - Performance degrades on images from equipment, patient populations, or preprocessing pipelines that differ from the NIH training distribution. - Reported AUC metrics are on the validation split, not the held-out test set. ## CheXNet Benchmark Context CheXNet (Rajpurkar et al., 2017) — the seminal paper establishing DenseNet-121 for chest X-ray classification — reported **0.841 macro AUC-ROC** on a comparable split of this dataset. CheXVision-DenseNet matches this benchmark. See the [CheXVision demo](https://huggingface.co/spaces/arudaev/chexvision-demo) for live inference. ## Citation ```bibtex @misc{{chexvision2026, title={{CheXVision: Dual-Task Chest X-ray Classification with Custom CNN and DenseNet-121}}, author={{BIG D(ATA) Team}}, year={{2026}}, howpublished={{\\url{{https://huggingface.co/{repo_id}}}}} }} ``` """ def upload_model_artifacts( checkpoint_path: Path, repo_id: str, token: str, checkpoint: dict[str, Any] | None = None, history_path: Path | None = None, ) -> None: """Upload a checkpoint, metadata, and model card to the HF Hub.""" checkpoint_path = Path(checkpoint_path) history_path = Path(history_path) if history_path else None history: dict[str, Any] | None = None if history_path and history_path.exists(): history = json.loads(history_path.read_text(encoding="utf-8")) checkpoint = checkpoint or {} model_card = render_model_card(repo_id, checkpoint, history) training_config = json.dumps(checkpoint.get("config", {}), indent=2) configure_hf_runtime(token=token) from huggingface_hub import HfApi api = HfApi(token=token) api.create_repo(repo_id=repo_id, repo_type="model", exist_ok=True) with tempfile.TemporaryDirectory() as tmp_dir: tmp_path = Path(tmp_dir) readme_path = tmp_path / "README.md" config_path = tmp_path / "training_config.json" staged_checkpoint = tmp_path / checkpoint_path.name readme_path.write_text(model_card, encoding="utf-8") config_path.write_text(training_config, encoding="utf-8") shutil.copy2(checkpoint_path, staged_checkpoint) if history_path and history_path.exists(): shutil.copy2(history_path, tmp_path / history_path.name) api.upload_folder( folder_path=str(tmp_path), repo_id=repo_id, repo_type="model", commit_message=f"Upload trained artifacts for {checkpoint_path.stem}", )