Spaces:

Klinapps
/

cephalometric-landmark-detection

Runtime error

App Files Files Community

Klinapps commited on Jan 30

Commit

03d222a

verified ·

1 Parent(s): 809fb82

Upload 3 files

Browse files

Files changed (3) hide show

app.py +449 -0
hrnet.py +395 -0
requirements.txt +14 -0

app.py ADDED Viewed

	@@ -0,0 +1,449 @@

+"""
+Cephalometric Landmark Detection API
+HRNet-W32 based automatic landmark detection for lateral cephalometric radiographs
+Space para integración con Klinafy
+"""
+import os
+import json
+import numpy as np
+from PIL import Image
+import torch
+import torch.nn.functional as F
+from huggingface_hub import hf_hub_download
+import gradio as gr
+from hrnet import get_hrnet_w32
+# ============================================================================
+# CONFIGURACIÓN
+# ============================================================================
+MODEL_REPO = "cwlachap/hrnet-cephalometric-landmark-detection"
+MODEL_FILE = "best_model.pth"
+INPUT_SIZE = 768
+HEATMAP_SIZE = 192
+NUM_LANDMARKS = 19
+# Nombres de los 19 landmarks en orden del modelo
+LANDMARK_NAMES = [
+    "S",      # 0 - Sella turcica
+    "N",      # 1 - Nasion
+    "Or",     # 2 - Orbitale
+    "Po",     # 3 - Porion
+    "Ba",     # 4 - Basion
+    "Pt",     # 5 - Pterygoid point
+    "ANS",    # 6 - Anterior Nasal Spine
+    "PNS",    # 7 - Posterior Nasal Spine
+    "A",      # 8 - Point A (Subspinale)
+    "U1T",    # 9 - Upper Incisor Tip
+    "U1R",    # 10 - Upper Incisor Root
+    "L1T",    # 11 - Lower Incisor Tip
+    "L1R",    # 12 - Lower Incisor Root
+    "B",      # 13 - Point B (Supramentale)
+    "Pog",    # 14 - Pogonion
+    "Gn",     # 15 - Gnathion
+    "Me",     # 16 - Menton
+    "Go",     # 17 - Gonion
+    "Ar"      # 18 - Articulare
+]
+# Colores para visualización (RGB)
+LANDMARK_COLORS = {
+    "cranial": (255, 0, 0),      # Rojo - S, N, Ba, Ar
+    "orbital": (0, 255, 0),       # Verde - Or, Po
+    "maxilar": (0, 0, 255),       # Azul - ANS, PNS, A, Pt
+    "dental": (255, 255, 0),      # Amarillo - U1T, U1R, L1T, L1R
+    "mandibular": (255, 0, 255)   # Magenta - B, Pog, Gn, Me, Go
+}
+LANDMARK_GROUPS = {
+    "S": "cranial", "N": "cranial", "Ba": "cranial", "Ar": "cranial",
+    "Or": "orbital", "Po": "orbital",
+    "ANS": "maxilar", "PNS": "maxilar", "A": "maxilar", "Pt": "maxilar",
+    "U1T": "dental", "U1R": "dental", "L1T": "dental", "L1R": "dental",
+    "B": "mandibular", "Pog": "mandibular", "Gn": "mandibular",
+    "Me": "mandibular", "Go": "mandibular"
+}
+# ============================================================================
+# MODELO
+# ============================================================================
+# Variable global para el modelo
+model = None
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+def load_model():
+    """Carga el modelo HRNet desde Hugging Face Hub"""
+    global model
+    if model is not None:
+        return model
+    print(f"Cargando modelo en {device}...")
+    # Descargar pesos
+    model_path = hf_hub_download(
+        repo_id=MODEL_REPO,
+        filename=MODEL_FILE
+    )
+    # Crear modelo
+    model = get_hrnet_w32(num_landmarks=NUM_LANDMARKS)
+    # Cargar pesos
+    checkpoint = torch.load(model_path, map_location=device, weights_only=False)
+    # Manejar diferentes formatos de checkpoint
+    if 'model_state_dict' in checkpoint:
+        state_dict = checkpoint['model_state_dict']
+    elif 'state_dict' in checkpoint:
+        state_dict = checkpoint['state_dict']
+    else:
+        state_dict = checkpoint
+    # Limpiar prefijos si existen
+    new_state_dict = {}
+    for k, v in state_dict.items():
+        name = k.replace('module.', '')  # Remover prefijo de DataParallel
+        new_state_dict[name] = v
+    model.load_state_dict(new_state_dict, strict=False)
+    model.to(device)
+    model.eval()
+    print("Modelo cargado exitosamente!")
+    return model
+# ============================================================================
+# PREPROCESAMIENTO
+# ============================================================================
+def preprocess_image(image):
+    """
+    Preprocesa la imagen para el modelo
+    Args:
+        image: PIL Image o numpy array
+    Returns:
+        tensor: Tensor normalizado [1, 3, 768, 768]
+        original_size: (width, height) original
+    """
+    # Convertir a PIL si es necesario
+    if isinstance(image, np.ndarray):
+        image = Image.fromarray(image)
+    # Guardar tamaño original
+    original_size = image.size  # (width, height)
+    # Convertir a RGB si es necesario
+    if image.mode != 'RGB':
+        image = image.convert('RGB')
+    # Redimensionar a 768x768
+    image = image.resize((INPUT_SIZE, INPUT_SIZE), Image.Resampling.BILINEAR)
+    # Convertir a tensor
+    img_array = np.array(image).astype(np.float32) / 255.0
+    # Normalizar con ImageNet stats
+    mean = np.array([0.485, 0.456, 0.406])
+    std = np.array([0.229, 0.224, 0.225])
+    img_array = (img_array - mean) / std
+    # Cambiar a formato CHW y agregar batch dimension
+    img_tensor = torch.from_numpy(img_array.transpose(2, 0, 1)).float()
+    img_tensor = img_tensor.unsqueeze(0)
+    return img_tensor, original_size
+# ============================================================================
+# POSTPROCESAMIENTO
+# ============================================================================
+def get_max_preds(heatmaps):
+    """
+    Obtiene las coordenadas del máximo de cada heatmap
+    Args:
+        heatmaps: tensor [batch, num_landmarks, H, W]
+    Returns:
+        preds: coordenadas [batch, num_landmarks, 2]
+        maxvals: valores de confianza [batch, num_landmarks, 1]
+    """
+    batch_size = heatmaps.shape[0]
+    num_joints = heatmaps.shape[1]
+    width = heatmaps.shape[3]
+    heatmaps_reshaped = heatmaps.reshape((batch_size, num_joints, -1))
+    idx = np.argmax(heatmaps_reshaped, axis=2)
+    maxvals = np.amax(heatmaps_reshaped, axis=2)
+    maxvals = maxvals.reshape((batch_size, num_joints, 1))
+    idx = idx.reshape((batch_size, num_joints, 1))
+    preds = np.tile(idx, (1, 1, 2)).astype(np.float32)
+    preds[:, :, 0] = (preds[:, :, 0]) % width
+    preds[:, :, 1] = np.floor((preds[:, :, 1]) / width)
+    return preds, maxvals
+def heatmaps_to_landmarks(heatmaps, original_size):
+    """
+    Convierte heatmaps a coordenadas de landmarks
+    Args:
+        heatmaps: tensor [1, 19, H, W]
+        original_size: (width, height) de la imagen original
+    Returns:
+        landmarks: lista de dicts con name, x, y, confidence
+    """
+    heatmaps_np = heatmaps.cpu().numpy()
+    # Obtener coordenadas del máximo
+    preds, maxvals = get_max_preds(heatmaps_np)
+    # Escalar a tamaño original
+    orig_w, orig_h = original_size
+    heatmap_h, heatmap_w = heatmaps_np.shape[2], heatmaps_np.shape[3]
+    scale_x = orig_w / heatmap_w
+    scale_y = orig_h / heatmap_h
+    landmarks = []
+    for i in range(NUM_LANDMARKS):
+        x = float(preds[0, i, 0] * scale_x)
+        y = float(preds[0, i, 1] * scale_y)
+        conf = float(maxvals[0, i, 0])
+        landmarks.append({
+            "name": LANDMARK_NAMES[i],
+            "x": round(x, 2),
+            "y": round(y, 2),
+            "confidence": round(conf, 4),
+            "group": LANDMARK_GROUPS[LANDMARK_NAMES[i]]
+        })
+    return landmarks
+# ============================================================================
+# INFERENCIA
+# ============================================================================
+def detect_landmarks(image):
+    """
+    Detecta landmarks cefalométricos en una imagen
+    Args:
+        image: PIL Image o numpy array
+    Returns:
+        landmarks: lista de dicts con name, x, y, confidence
+        annotated_image: imagen con landmarks dibujados
+    """
+    # Cargar modelo si no está cargado
+    model = load_model()
+    # Preprocesar
+    img_tensor, original_size = preprocess_image(image)
+    img_tensor = img_tensor.to(device)
+    # Inferencia
+    with torch.no_grad():
+        heatmaps = model(img_tensor)
+    # Postprocesar
+    landmarks = heatmaps_to_landmarks(heatmaps, original_size)
+    # Crear imagen anotada
+    annotated = draw_landmarks(image, landmarks)
+    return landmarks, annotated
+def draw_landmarks(image, landmarks, radius=5):
+    """
+    Dibuja los landmarks en la imagen
+    Args:
+        image: PIL Image o numpy array
+        landmarks: lista de dicts con coordenadas
+        radius: radio del círculo
+    Returns:
+        PIL Image con landmarks dibujados
+    """
+    from PIL import ImageDraw, ImageFont
+    if isinstance(image, np.ndarray):
+        image = Image.fromarray(image)
+    # Crear copia para dibujar
+    img_draw = image.copy()
+    if img_draw.mode != 'RGB':
+        img_draw = img_draw.convert('RGB')
+    draw = ImageDraw.Draw(img_draw)
+    # Intentar cargar fuente, usar default si falla
+    try:
+        font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", 12)
+    except:
+        font = ImageFont.load_default()
+    for lm in landmarks:
+        x, y = lm['x'], lm['y']
+        color = LANDMARK_COLORS[lm['group']]
+        # Dibujar círculo
+        draw.ellipse([x-radius, y-radius, x+radius, y+radius],
+                     fill=color, outline=(255, 255, 255))
+        # Dibujar nombre
+        draw.text((x+radius+2, y-radius), lm['name'],
+                  fill=(255, 255, 255), font=font,
+                  stroke_width=1, stroke_fill=(0, 0, 0))
+    return img_draw
+# ============================================================================
+# INTERFAZ GRADIO
+# ============================================================================
+def process_image(image):
+    """Función principal para Gradio"""
+    if image is None:
+        return None, "Por favor sube una imagen cefalométrica"
+    try:
+        landmarks, annotated = detect_landmarks(image)
+        # Formatear JSON para mostrar
+        json_output = json.dumps({
+            "success": True,
+            "num_landmarks": len(landmarks),
+            "landmarks": landmarks
+        }, indent=2)
+        return annotated, json_output
+    except Exception as e:
+        return None, json.dumps({
+            "success": False,
+            "error": str(e)
+        }, indent=2)
+def api_predict(image):
+    """Endpoint API para integración con Klinafy"""
+    if image is None:
+        return {"success": False, "error": "No image provided"}
+    try:
+        landmarks, _ = detect_landmarks(image)
+        return {
+            "success": True,
+            "model": "HRNet-W32",
+            "num_landmarks": len(landmarks),
+            "landmarks": landmarks
+        }
+    except Exception as e:
+        return {
+            "success": False,
+            "error": str(e)
+        }
+# ============================================================================
+# CREAR APP
+# ============================================================================
+# Cargar modelo al inicio
+print("Inicializando modelo...")
+load_model()
+# Crear interfaz Gradio
+with gr.Blocks(title="Cephalometric Landmark Detection") as demo:
+    gr.Markdown("""
+    # 🦷 Detección de Landmarks Cefalométricos
+    Detección automática de **19 puntos cefalométricos** usando HRNet-W32.
+    ### Landmarks detectados:
+    - **Craneales** (rojo): S, N, Ba, Ar
+    - **Orbitales** (verde): Or, Po
+    - **Maxilares** (azul): ANS, PNS, A, Pt
+    - **Dentales** (amarillo): U1T, U1R, L1T, L1R
+    - **Mandibulares** (magenta): B, Pog, Gn, Me, Go
+    ---
+    """)
+    with gr.Row():
+        with gr.Column():
+            input_image = gr.Image(
+                label="Radiografía Cefalométrica Lateral",
+                type="pil"
+            )
+            detect_btn = gr.Button("🔍 Detectar Landmarks", variant="primary")
+        with gr.Column():
+            output_image = gr.Image(
+                label="Imagen con Landmarks"
+            )
+            output_json = gr.Code(
+                label="Coordenadas (JSON)",
+                language="json"
+            )
+    detect_btn.click(
+        fn=process_image,
+        inputs=[input_image],
+        outputs=[output_image, output_json]
+    )
+    gr.Markdown("""
+    ---
+    ### 📡 API Endpoint
+    Para integración programática (ej. Klinafy):
+    ```javascript
+    const response = await fetch('https://YOUR-SPACE.hf.space/api/predict', {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({ data: [base64Image] })
+    });
+    const result = await response.json();
+    // result.data[0] = { success: true, landmarks: [...] }
+    ```
+    ---
+    **Modelo**: HRNet-W32 | **Precisión**: MRE ~1.5mm | **Licencia**: MIT
+    """)
+# Habilitar API
+demo.queue()
+if __name__ == "__main__":
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=False
+    )

hrnet.py ADDED Viewed

	@@ -0,0 +1,395 @@

+"""
+HRNet-W32 Architecture for Cephalometric Landmark Detection
+Based on: Deep High-Resolution Representation Learning for Visual Recognition
+"""
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+def conv3x3(in_planes, out_planes, stride=1):
+    """3x3 convolution with padding"""
+    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
+                     padding=1, bias=False)
+class BasicBlock(nn.Module):
+    expansion = 1
+    def __init__(self, inplanes, planes, stride=1, downsample=None):
+        super(BasicBlock, self).__init__()
+        self.conv1 = conv3x3(inplanes, planes, stride)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = conv3x3(planes, planes)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.downsample = downsample
+        self.stride = stride
+    def forward(self, x):
+        residual = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        if self.downsample is not None:
+            residual = self.downsample(x)
+        out += residual
+        out = self.relu(out)
+        return out
+class Bottleneck(nn.Module):
+    expansion = 4
+    def __init__(self, inplanes, planes, stride=1, downsample=None):
+        super(Bottleneck, self).__init__()
+        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
+                               padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1,
+                               bias=False)
+        self.bn3 = nn.BatchNorm2d(planes * self.expansion)
+        self.relu = nn.ReLU(inplace=True)
+        self.downsample = downsample
+        self.stride = stride
+    def forward(self, x):
+        residual = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+        out = self.conv3(out)
+        out = self.bn3(out)
+        if self.downsample is not None:
+            residual = self.downsample(x)
+        out += residual
+        out = self.relu(out)
+        return out
+class HighResolutionModule(nn.Module):
+    def __init__(self, num_branches, blocks, num_blocks, num_inchannels,
+                 num_channels, fuse_method, multi_scale_output=True):
+        super(HighResolutionModule, self).__init__()
+        self._check_branches(num_branches, num_blocks, num_inchannels, num_channels)
+        self.num_inchannels = num_inchannels
+        self.fuse_method = fuse_method
+        self.num_branches = num_branches
+        self.multi_scale_output = multi_scale_output
+        self.branches = self._make_branches(num_branches, blocks, num_blocks, num_channels)
+        self.fuse_layers = self._make_fuse_layers()
+        self.relu = nn.ReLU(inplace=True)
+    def _check_branches(self, num_branches, num_blocks, num_inchannels, num_channels):
+        if num_branches != len(num_blocks):
+            raise ValueError('NUM_BRANCHES != len(NUM_BLOCKS)')
+        if num_branches != len(num_channels):
+            raise ValueError('NUM_BRANCHES != len(NUM_CHANNELS)')
+        if num_branches != len(num_inchannels):
+            raise ValueError('NUM_BRANCHES != len(NUM_INCHANNELS)')
+    def _make_one_branch(self, branch_index, block, num_blocks, num_channels, stride=1):
+        downsample = None
+        if stride != 1 or self.num_inchannels[branch_index] != num_channels[branch_index] * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2d(self.num_inchannels[branch_index],
+                          num_channels[branch_index] * block.expansion,
+                          kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(num_channels[branch_index] * block.expansion),
+            )
+        layers = []
+        layers.append(block(self.num_inchannels[branch_index],
+                            num_channels[branch_index], stride, downsample))
+        self.num_inchannels[branch_index] = num_channels[branch_index] * block.expansion
+        for i in range(1, num_blocks[branch_index]):
+            layers.append(block(self.num_inchannels[branch_index], num_channels[branch_index]))
+        return nn.Sequential(*layers)
+    def _make_branches(self, num_branches, block, num_blocks, num_channels):
+        branches = []
+        for i in range(num_branches):
+            branches.append(self._make_one_branch(i, block, num_blocks, num_channels))
+        return nn.ModuleList(branches)
+    def _make_fuse_layers(self):
+        if self.num_branches == 1:
+            return None
+        num_branches = self.num_branches
+        num_inchannels = self.num_inchannels
+        fuse_layers = []
+        for i in range(num_branches if self.multi_scale_output else 1):
+            fuse_layer = []
+            for j in range(num_branches):
+                if j > i:
+                    fuse_layer.append(nn.Sequential(
+                        nn.Conv2d(num_inchannels[j], num_inchannels[i], 1, 1, 0, bias=False),
+                        nn.BatchNorm2d(num_inchannels[i])))
+                elif j == i:
+                    fuse_layer.append(None)
+                else:
+                    conv3x3s = []
+                    for k in range(i - j):
+                        if k == i - j - 1:
+                            num_outchannels_conv3x3 = num_inchannels[i]
+                            conv3x3s.append(nn.Sequential(
+                                nn.Conv2d(num_inchannels[j], num_outchannels_conv3x3, 3, 2, 1, bias=False),
+                                nn.BatchNorm2d(num_outchannels_conv3x3)))
+                        else:
+                            num_outchannels_conv3x3 = num_inchannels[j]
+                            conv3x3s.append(nn.Sequential(
+                                nn.Conv2d(num_inchannels[j], num_outchannels_conv3x3, 3, 2, 1, bias=False),
+                                nn.BatchNorm2d(num_outchannels_conv3x3),
+                                nn.ReLU(inplace=True)))
+                    fuse_layer.append(nn.Sequential(*conv3x3s))
+            fuse_layers.append(nn.ModuleList(fuse_layer))
+        return nn.ModuleList(fuse_layers)
+    def get_num_inchannels(self):
+        return self.num_inchannels
+    def forward(self, x):
+        if self.num_branches == 1:
+            return [self.branches[0](x[0])]
+        for i in range(self.num_branches):
+            x[i] = self.branches[i](x[i])
+        x_fuse = []
+        for i in range(len(self.fuse_layers)):
+            y = x[0] if i == 0 else self.fuse_layers[i][0](x[0])
+            for j in range(1, self.num_branches):
+                if i == j:
+                    y = y + x[j]
+                elif j > i:
+                    width_output = x[i].shape[-1]
+                    height_output = x[i].shape[-2]
+                    y = y + F.interpolate(
+                        self.fuse_layers[i][j](x[j]),
+                        size=[height_output, width_output],
+                        mode='bilinear', align_corners=True)
+                else:
+                    y = y + self.fuse_layers[i][j](x[j])
+            x_fuse.append(self.relu(y))
+        return x_fuse
+class HRNetW32(nn.Module):
+    """
+    HRNet-W32 for Cephalometric Landmark Detection
+    Input: 768x768 grayscale/RGB image
+    Output: 19 landmark heatmaps (192x192)
+    """
+    def __init__(self, num_landmarks=19):
+        super(HRNetW32, self).__init__()
+        self.num_landmarks = num_landmarks
+        # Stem
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(64)
+        self.conv2 = nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(64)
+        self.relu = nn.ReLU(inplace=True)
+        # Stage 1
+        self.layer1 = self._make_layer(Bottleneck, 64, 64, 4)
+        # Stage 2
+        self.stage2_cfg = {
+            'NUM_MODULES': 1,
+            'NUM_BRANCHES': 2,
+            'NUM_BLOCKS': [4, 4],
+            'NUM_CHANNELS': [32, 64],
+            'BLOCK': BasicBlock,
+            'FUSE_METHOD': 'SUM'
+        }
+        num_channels = self.stage2_cfg['NUM_CHANNELS']
+        block = self.stage2_cfg['BLOCK']
+        num_channels = [num_channels[i] * block.expansion for i in range(len(num_channels))]
+        self.transition1 = self._make_transition_layer([256], num_channels)
+        self.stage2, pre_stage_channels = self._make_stage(self.stage2_cfg, num_channels)
+        # Stage 3
+        self.stage3_cfg = {
+            'NUM_MODULES': 4,
+            'NUM_BRANCHES': 3,
+            'NUM_BLOCKS': [4, 4, 4],
+            'NUM_CHANNELS': [32, 64, 128],
+            'BLOCK': BasicBlock,
+            'FUSE_METHOD': 'SUM'
+        }
+        num_channels = self.stage3_cfg['NUM_CHANNELS']
+        block = self.stage3_cfg['BLOCK']
+        num_channels = [num_channels[i] * block.expansion for i in range(len(num_channels))]
+        self.transition2 = self._make_transition_layer(pre_stage_channels, num_channels)
+        self.stage3, pre_stage_channels = self._make_stage(self.stage3_cfg, num_channels)
+        # Stage 4
+        self.stage4_cfg = {
+            'NUM_MODULES': 3,
+            'NUM_BRANCHES': 4,
+            'NUM_BLOCKS': [4, 4, 4, 4],
+            'NUM_CHANNELS': [32, 64, 128, 256],
+            'BLOCK': BasicBlock,
+            'FUSE_METHOD': 'SUM'
+        }
+        num_channels = self.stage4_cfg['NUM_CHANNELS']
+        block = self.stage4_cfg['BLOCK']
+        num_channels = [num_channels[i] * block.expansion for i in range(len(num_channels))]
+        self.transition3 = self._make_transition_layer(pre_stage_channels, num_channels)
+        self.stage4, pre_stage_channels = self._make_stage(self.stage4_cfg, num_channels, multi_scale_output=True)
+        # Head
+        last_inp_channels = sum(pre_stage_channels)
+        self.head = nn.Sequential(
+            nn.Conv2d(last_inp_channels, last_inp_channels, kernel_size=1, stride=1, padding=0),
+            nn.BatchNorm2d(last_inp_channels),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(last_inp_channels, num_landmarks, kernel_size=1, stride=1, padding=0)
+        )
+    def _make_layer(self, block, inplanes, planes, blocks, stride=1):
+        downsample = None
+        if stride != 1 or inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2d(inplanes, planes * block.expansion,
+                          kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(planes * block.expansion),
+            )
+        layers = []
+        layers.append(block(inplanes, planes, stride, downsample))
+        inplanes = planes * block.expansion
+        for i in range(1, blocks):
+            layers.append(block(inplanes, planes))
+        return nn.Sequential(*layers)
+    def _make_transition_layer(self, num_channels_pre_layer, num_channels_cur_layer):
+        num_branches_cur = len(num_channels_cur_layer)
+        num_branches_pre = len(num_channels_pre_layer)
+        transition_layers = []
+        for i in range(num_branches_cur):
+            if i < num_branches_pre:
+                if num_channels_cur_layer[i] != num_channels_pre_layer[i]:
+                    transition_layers.append(nn.Sequential(
+                        nn.Conv2d(num_channels_pre_layer[i], num_channels_cur_layer[i], 3, 1, 1, bias=False),
+                        nn.BatchNorm2d(num_channels_cur_layer[i]),
+                        nn.ReLU(inplace=True)))
+                else:
+                    transition_layers.append(None)
+            else:
+                conv3x3s = []
+                for j in range(i + 1 - num_branches_pre):
+                    inchannels = num_channels_pre_layer[-1]
+                    outchannels = num_channels_cur_layer[i] if j == i - num_branches_pre else inchannels
+                    conv3x3s.append(nn.Sequential(
+                        nn.Conv2d(inchannels, outchannels, 3, 2, 1, bias=False),
+                        nn.BatchNorm2d(outchannels),
+                        nn.ReLU(inplace=True)))
+                transition_layers.append(nn.Sequential(*conv3x3s))
+        return nn.ModuleList(transition_layers)
+    def _make_stage(self, layer_config, num_inchannels, multi_scale_output=True):
+        num_modules = layer_config['NUM_MODULES']
+        num_branches = layer_config['NUM_BRANCHES']
+        num_blocks = layer_config['NUM_BLOCKS']
+        num_channels = layer_config['NUM_CHANNELS']
+        block = layer_config['BLOCK']
+        fuse_method = layer_config['FUSE_METHOD']
+        modules = []
+        for i in range(num_modules):
+            if not multi_scale_output and i == num_modules - 1:
+                reset_multi_scale_output = False
+            else:
+                reset_multi_scale_output = True
+            modules.append(
+                HighResolutionModule(num_branches, block, num_blocks, num_inchannels,
+                                     num_channels, fuse_method, reset_multi_scale_output)
+            )
+            num_inchannels = modules[-1].get_num_inchannels()
+        return nn.Sequential(*modules), num_inchannels
+    def forward(self, x):
+        # Stem
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.conv2(x)
+        x = self.bn2(x)
+        x = self.relu(x)
+        # Stage 1
+        x = self.layer1(x)
+        # Stage 2
+        x_list = []
+        for i in range(self.stage2_cfg['NUM_BRANCHES']):
+            if self.transition1[i] is not None:
+                x_list.append(self.transition1[i](x))
+            else:
+                x_list.append(x)
+        y_list = self.stage2(x_list)
+        # Stage 3
+        x_list = []
+        for i in range(self.stage3_cfg['NUM_BRANCHES']):
+            if self.transition2[i] is not None:
+                if i < self.stage2_cfg['NUM_BRANCHES']:
+                    x_list.append(self.transition2[i](y_list[i]))
+                else:
+                    x_list.append(self.transition2[i](y_list[-1]))
+            else:
+                x_list.append(y_list[i])
+        y_list = self.stage3(x_list)
+        # Stage 4
+        x_list = []
+        for i in range(self.stage4_cfg['NUM_BRANCHES']):
+            if self.transition3[i] is not None:
+                if i < self.stage3_cfg['NUM_BRANCHES']:
+                    x_list.append(self.transition3[i](y_list[i]))
+                else:
+                    x_list.append(self.transition3[i](y_list[-1]))
+            else:
+                x_list.append(y_list[i])
+        x = self.stage4(x_list)
+        # Upscale to highest resolution
+        x0_h, x0_w = x[0].size(2), x[0].size(3)
+        x1 = F.interpolate(x[1], size=(x0_h, x0_w), mode='bilinear', align_corners=True)
+        x2 = F.interpolate(x[2], size=(x0_h, x0_w), mode='bilinear', align_corners=True)
+        x3 = F.interpolate(x[3], size=(x0_h, x0_w), mode='bilinear', align_corners=True)
+        x = torch.cat([x[0], x1, x2, x3], 1)
+        # Head
+        x = self.head(x)
+        return x
+def get_hrnet_w32(num_landmarks=19):
+    """Create HRNet-W32 model for cephalometric landmark detection"""
+    model = HRNetW32(num_landmarks=num_landmarks)
+    return model

requirements.txt ADDED Viewed

	@@ -0,0 +1,14 @@

+# Core ML
+torch>=2.0.0
+torchvision>=0.15.0
+# Hugging Face
+huggingface_hub>=0.19.0
+gradio>=4.0.0
+# Image processing
+Pillow>=10.0.0
+numpy>=1.24.0
+# Optional but useful
+scipy>=1.10.0