Spaces:

Klinapps
/

cephalometric-landmark-detection

Runtime error

App Files Files Community

Klinapps commited on Jan 30

Commit

11e7d47

verified ·

1 Parent(s): 17ade45

Upload 2 files

Browse files

Files changed (2) hide show

app.py +295 -320
requirements.txt +0 -8

app.py CHANGED Viewed

@@ -1,105 +1,272 @@
 """
 Cephalometric Landmark Detection API
-HRNet-W32 based automatic landmark detection for lateral cephalometric radiographs
-Space para integración con Klinafy
 """
 import os
 import json
 import numpy as np
-from PIL import Image
 import torch
-import torch.nn.functional as F
 from huggingface_hub import hf_hub_download
 import gradio as gr
-from hrnet import get_hrnet_w32
 # ============================================================================
 # CONFIGURACIÓN
 # ============================================================================
 MODEL_REPO = "cwlachap/hrnet-cephalometric-landmark-detection"
 MODEL_FILE = "best_model.pth"
-INPUT_SIZE = 768
-HEATMAP_SIZE = 192
 NUM_LANDMARKS = 19
-# Nombres de los 19 landmarks en orden del modelo
 LANDMARK_NAMES = [
-    "S",      # 0 - Sella turcica
-    "N",      # 1 - Nasion
-    "Or",     # 2 - Orbitale
-    "Po",     # 3 - Porion
-    "Ba",     # 4 - Basion
-    "Pt",     # 5 - Pterygoid point
-    "ANS",    # 6 - Anterior Nasal Spine
-    "PNS",    # 7 - Posterior Nasal Spine
-    "A",      # 8 - Point A (Subspinale)
-    "U1T",    # 9 - Upper Incisor Tip
-    "U1R",    # 10 - Upper Incisor Root
-    "L1T",    # 11 - Lower Incisor Tip
-    "L1R",    # 12 - Lower Incisor Root
-    "B",      # 13 - Point B (Supramentale)
-    "Pog",    # 14 - Pogonion
-    "Gn",     # 15 - Gnathion
-    "Me",     # 16 - Menton
-    "Go",     # 17 - Gonion
-    "Ar"      # 18 - Articulare
 ]
-# Colores para visualización (RGB)
 LANDMARK_COLORS = {
-    "cranial": (255, 0, 0),      # Rojo - S, N, Ba, Ar
-    "orbital": (0, 255, 0),       # Verde - Or, Po
-    "maxilar": (0, 0, 255),       # Azul - ANS, PNS, A, Pt
-    "dental": (255, 255, 0),      # Amarillo - U1T, U1R, L1T, L1R
-    "mandibular": (255, 0, 255)   # Magenta - B, Pog, Gn, Me, Go
 }
-LANDMARK_GROUPS = {
-    "S": "cranial", "N": "cranial", "Ba": "cranial", "Ar": "cranial",
-    "Or": "orbital", "Po": "orbital",
-    "ANS": "maxilar", "PNS": "maxilar", "A": "maxilar", "Pt": "maxilar",
-    "U1T": "dental", "U1R": "dental", "L1T": "dental", "L1R": "dental",
-    "B": "mandibular", "Pog": "mandibular", "Gn": "mandibular",
-    "Me": "mandibular", "Go": "mandibular"
-}
 # ============================================================================
-# MODELO
 # ============================================================================
-# Variable global para el modelo
 model = None
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 def load_model():
-    """Carga el modelo HRNet desde Hugging Face Hub"""
     global model
     if model is not None:
         return model
     print(f"Cargando modelo en {device}...")
-    # Descargar pesos
-    model_path = hf_hub_download(
-        repo_id=MODEL_REPO,
-        filename=MODEL_FILE
-    )
-    # Crear modelo
-    model = get_hrnet_w32(num_landmarks=NUM_LANDMARKS)
-    # Cargar pesos
     checkpoint = torch.load(model_path, map_location=device, weights_only=False)
-    # Manejar diferentes formatos de checkpoint
     if 'model_state_dict' in checkpoint:
         state_dict = checkpoint['model_state_dict']
     elif 'state_dict' in checkpoint:
@@ -107,339 +274,147 @@ def load_model():
     else:
         state_dict = checkpoint
-    # Limpiar prefijos si existen
     new_state_dict = {}
     for k, v in state_dict.items():
-        name = k.replace('module.', '')  # Remover prefijo de DataParallel
         new_state_dict[name] = v
-    model.load_state_dict(new_state_dict, strict=False)
     model.to(device)
     model.eval()
-    print("Modelo cargado exitosamente!")
     return model
-# ============================================================================
-# PREPROCESAMIENTO
-# ============================================================================
-def preprocess_image(image):
-    """
-    Preprocesa la imagen para el modelo
-    Args:
-        image: PIL Image o numpy array
-    Returns:
-        tensor: Tensor normalizado [1, 3, 768, 768]
-        original_size: (width, height) original
-    """
-    # Convertir a PIL si es necesario
     if isinstance(image, np.ndarray):
         image = Image.fromarray(image)
-    # Guardar tamaño original
-    original_size = image.size  # (width, height)
-    # Convertir a RGB si es necesario
     if image.mode != 'RGB':
         image = image.convert('RGB')
-    # Redimensionar a 768x768
-    image = image.resize((INPUT_SIZE, INPUT_SIZE), Image.Resampling.BILINEAR)
-    # Convertir a tensor
-    img_array = np.array(image).astype(np.float32) / 255.0
-    # Normalizar con ImageNet stats
     mean = np.array([0.485, 0.456, 0.406])
     std = np.array([0.229, 0.224, 0.225])
     img_array = (img_array - mean) / std
-    # Cambiar a formato CHW y agregar batch dimension
-    img_tensor = torch.from_numpy(img_array.transpose(2, 0, 1)).float()
-    img_tensor = img_tensor.unsqueeze(0)
-    return img_tensor, original_size
-# ============================================================================
-# POSTPROCESAMIENTO
-# ============================================================================
-def get_max_preds(heatmaps):
-    """
-    Obtiene las coordenadas del máximo de cada heatmap
-    Args:
-        heatmaps: tensor [batch, num_landmarks, H, W]
-    Returns:
-        preds: coordenadas [batch, num_landmarks, 2]
-        maxvals: valores de confianza [batch, num_landmarks, 1]
-    """
-    batch_size = heatmaps.shape[0]
-    num_joints = heatmaps.shape[1]
-    width = heatmaps.shape[3]
-    heatmaps_reshaped = heatmaps.reshape((batch_size, num_joints, -1))
-    idx = np.argmax(heatmaps_reshaped, axis=2)
-    maxvals = np.amax(heatmaps_reshaped, axis=2)
-    maxvals = maxvals.reshape((batch_size, num_joints, 1))
-    idx = idx.reshape((batch_size, num_joints, 1))
-    preds = np.tile(idx, (1, 1, 2)).astype(np.float32)
-    preds[:, :, 0] = (preds[:, :, 0]) % width
-    preds[:, :, 1] = np.floor((preds[:, :, 1]) / width)
-    return preds, maxvals
-def heatmaps_to_landmarks(heatmaps, original_size):
-    """
-    Convierte heatmaps a coordenadas de landmarks
-    Args:
-        heatmaps: tensor [1, 19, H, W]
-        original_size: (width, height) de la imagen original
-    Returns:
-        landmarks: lista de dicts con name, x, y, confidence
-    """
-    heatmaps_np = heatmaps.cpu().numpy()
-    # Obtener coordenadas del máximo
-    preds, maxvals = get_max_preds(heatmaps_np)
-    # Escalar a tamaño original
     orig_w, orig_h = original_size
-    heatmap_h, heatmap_w = heatmaps_np.shape[2], heatmaps_np.shape[3]
     scale_x = orig_w / heatmap_w
     scale_y = orig_h / heatmap_h
     landmarks = []
     for i in range(NUM_LANDMARKS):
-        x = float(preds[0, i, 0] * scale_x)
-        y = float(preds[0, i, 1] * scale_y)
-        conf = float(maxvals[0, i, 0])
         landmarks.append({
             "name": LANDMARK_NAMES[i],
-            "x": round(x, 2),
-            "y": round(y, 2),
-            "confidence": round(conf, 4),
-            "group": LANDMARK_GROUPS[LANDMARK_NAMES[i]]
         })
-    return landmarks
-# ============================================================================
-# INFERENCIA
-# ============================================================================
-def detect_landmarks(image):
-    """
-    Detecta landmarks cefalométricos en una imagen
-    Args:
-        image: PIL Image o numpy array
-    Returns:
-        landmarks: lista de dicts con name, x, y, confidence
-        annotated_image: imagen con landmarks dibujados
-    """
-    # Cargar modelo si no está cargado
-    model = load_model()
-    # Preprocesar
-    img_tensor, original_size = preprocess_image(image)
-    img_tensor = img_tensor.to(device)
-    # Inferencia
-    with torch.no_grad():
-        heatmaps = model(img_tensor)
-    # Postprocesar
-    landmarks = heatmaps_to_landmarks(heatmaps, original_size)
-    # Crear imagen anotada
     annotated = draw_landmarks(image, landmarks)
     return landmarks, annotated
-def draw_landmarks(image, landmarks, radius=5):
-    """
-    Dibuja los landmarks en la imagen
-    Args:
-        image: PIL Image o numpy array
-        landmarks: lista de dicts con coordenadas
-        radius: radio del círculo
-    Returns:
-        PIL Image con landmarks dibujados
-    """
-    from PIL import ImageDraw, ImageFont
     if isinstance(image, np.ndarray):
         image = Image.fromarray(image)
-    # Crear copia para dibujar
-    img_draw = image.copy()
-    if img_draw.mode != 'RGB':
-        img_draw = img_draw.convert('RGB')
     draw = ImageDraw.Draw(img_draw)
-    # Intentar cargar fuente, usar default si falla
     try:
-        font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", 12)
     except:
         font = ImageFont.load_default()
     for lm in landmarks:
         x, y = lm['x'], lm['y']
-        color = LANDMARK_COLORS[lm['group']]
-        # Dibujar círculo
-        draw.ellipse([x-radius, y-radius, x+radius, y+radius],
-                     fill=color, outline=(255, 255, 255))
-        # Dibujar nombre
-        draw.text((x+radius+2, y-radius), lm['name'],
-                  fill=(255, 255, 255), font=font,
-                  stroke_width=1, stroke_fill=(0, 0, 0))
     return img_draw
 # ============================================================================
 # INTERFAZ GRADIO
 # ============================================================================
 def process_image(image):
-    """Función principal para Gradio"""
     if image is None:
-        return None, "Por favor sube una imagen cefalométrica"
     try:
         landmarks, annotated = detect_landmarks(image)
-        # Formatear JSON para mostrar
-        json_output = json.dumps({
-            "success": True,
-            "num_landmarks": len(landmarks),
-            "landmarks": landmarks
-        }, indent=2)
-        return annotated, json_output
-    except Exception as e:
-        return None, json.dumps({
-            "success": False,
-            "error": str(e)
-        }, indent=2)
-def api_predict(image):
-    """Endpoint API para integración con Klinafy"""
-    if image is None:
-        return {"success": False, "error": "No image provided"}
-    try:
-        landmarks, _ = detect_landmarks(image)
-        return {
             "success": True,
-            "model": "HRNet-W32",
             "num_landmarks": len(landmarks),
             "landmarks": landmarks
         }
     except Exception as e:
-        return {
-            "success": False,
-            "error": str(e)
-        }
-# ============================================================================
-# CREAR APP
-# ============================================================================
-# Cargar modelo al inicio
-print("Inicializando modelo...")
 load_model()
-# Crear interfaz Gradio
-with gr.Blocks(title="Cephalometric Landmark Detection") as demo:
-    gr.Markdown("""
-    # 🦷 Detección de Landmarks Cefalométricos
-    Detección automática de **19 puntos cefalométricos** usando HRNet-W32.
-    ### Landmarks detectados:
-    - **Craneales** (rojo): S, N, Ba, Ar
-    - **Orbitales** (verde): Or, Po
-    - **Maxilares** (azul): ANS, PNS, A, Pt
-    - **Dentales** (amarillo): U1T, U1R, L1T, L1R
-    - **Mandibulares** (magenta): B, Pog, Gn, Me, Go
-    ---
-    """)
     with gr.Row():
         with gr.Column():
-            input_image = gr.Image(
-                label="Radiografía Cefalométrica Lateral",
-                type="pil"
-            )
-            detect_btn = gr.Button("🔍 Detectar Landmarks", variant="primary")
         with gr.Column():
-            output_image = gr.Image(
-                label="Imagen con Landmarks"
-            )
-            output_json = gr.Code(
-                label="Coordenadas (JSON)",
-                language="json"
-            )
-    detect_btn.click(
-        fn=process_image,
-        inputs=[input_image],
-        outputs=[output_image, output_json]
-    )
-    gr.Markdown("""
-    ---
-    ### 📡 API Endpoint
-    Para integración programática (ej. Klinafy):
-    ```javascript
-    const response = await fetch('https://YOUR-SPACE.hf.space/api/predict', {
-        method: 'POST',
-        headers: { 'Content-Type': 'application/json' },
-        body: JSON.stringify({ data: [base64Image] })
-    });
-    const result = await response.json();
-    // result.data[0] = { success: true, landmarks: [...] }
-    ```
-    ---
-    **Modelo**: HRNet-W32 | **Precisión**: MRE ~1.5mm | **Licencia**: MIT
-    """)
-# Habilitar API
 demo.queue()
-iif __name__ == "__main__":
     demo.launch(ssr_mode=False)

 """
 Cephalometric Landmark Detection API
+HRNet-W32 para 19 landmarks cefalométricos
 """
 import os
 import json
 import numpy as np
+from PIL import Image, ImageDraw, ImageFont
 import torch
+import torch.nn as nn
 from huggingface_hub import hf_hub_download
 import gradio as gr
 # ============================================================================
 # CONFIGURACIÓN
 # ============================================================================
 MODEL_REPO = "cwlachap/hrnet-cephalometric-landmark-detection"
 MODEL_FILE = "best_model.pth"
 NUM_LANDMARKS = 19
+INPUT_SIZE = 768
 LANDMARK_NAMES = [
+    "S", "N", "Or", "Po", "Ba", "Pt", "ANS", "PNS", "A",
+    "U1T", "U1R", "L1T", "L1R", "B", "Pog", "Gn", "Me", "Go", "Ar"
 ]
 LANDMARK_COLORS = {
+    'S': (255, 0, 0), 'N': (255, 0, 0), 'Ba': (255, 0, 0), 'Ar': (255, 0, 0),
+    'Or': (0, 255, 0), 'Po': (0, 255, 0),
+    'ANS': (0, 100, 255), 'PNS': (0, 100, 255), 'A': (0, 100, 255), 'Pt': (0, 100, 255),
+    'U1T': (255, 255, 0), 'U1R': (255, 255, 0), 'L1T': (255, 255, 0), 'L1R': (255, 255, 0),
+    'B': (255, 0, 255), 'Pog': (255, 0, 255), 'Gn': (255, 0, 255),
+    'Me': (255, 0, 255), 'Go': (255, 0, 255)
 }
+# ============================================================================
+# ARQUITECTURA HRNET-W32
+# ============================================================================
+BN_MOMENTUM = 0.1
+def conv3x3(in_planes, out_planes, stride=1):
+    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)
+class BasicBlock(nn.Module):
+    expansion = 1
+    def __init__(self, inplanes, planes, stride=1, downsample=None):
+        super(BasicBlock, self).__init__()
+        self.conv1 = conv3x3(inplanes, planes, stride)
+        self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = conv3x3(planes, planes)
+        self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
+        self.downsample = downsample
+    def forward(self, x):
+        residual = x
+        out = self.relu(self.bn1(self.conv1(x)))
+        out = self.bn2(self.conv2(out))
+        if self.downsample is not None:
+            residual = self.downsample(x)
+        return self.relu(out + residual)
+class Bottleneck(nn.Module):
+    expansion = 4
+    def __init__(self, inplanes, planes, stride=1, downsample=None):
+        super(Bottleneck, self).__init__()
+        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
+        self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, bias=False)
+        self.bn3 = nn.BatchNorm2d(planes * self.expansion, momentum=BN_MOMENTUM)
+        self.relu = nn.ReLU(inplace=True)
+        self.downsample = downsample
+    def forward(self, x):
+        residual = x
+        out = self.relu(self.bn1(self.conv1(x)))
+        out = self.relu(self.bn2(self.conv2(out)))
+        out = self.bn3(self.conv3(out))
+        if self.downsample is not None:
+            residual = self.downsample(x)
+        return self.relu(out + residual)
+class HighResolutionModule(nn.Module):
+    def __init__(self, num_branches, blocks, num_blocks, num_inchannels, num_channels, fuse_method, multi_scale_output=True):
+        super(HighResolutionModule, self).__init__()
+        self.num_inchannels = num_inchannels
+        self.num_branches = num_branches
+        self.multi_scale_output = multi_scale_output
+        self.branches = self._make_branches(num_branches, blocks, num_blocks, num_channels)
+        self.fuse_layers = self._make_fuse_layers()
+        self.relu = nn.ReLU(True)
+    def _make_one_branch(self, branch_index, block, num_blocks, num_channels, stride=1):
+        downsample = None
+        if stride != 1 or self.num_inchannels[branch_index] != num_channels[branch_index] * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2d(self.num_inchannels[branch_index], num_channels[branch_index] * block.expansion, 1, stride, bias=False),
+                nn.BatchNorm2d(num_channels[branch_index] * block.expansion, momentum=BN_MOMENTUM))
+        layers = [block(self.num_inchannels[branch_index], num_channels[branch_index], stride, downsample)]
+        self.num_inchannels[branch_index] = num_channels[branch_index] * block.expansion
+        for _ in range(1, num_blocks[branch_index]):
+            layers.append(block(self.num_inchannels[branch_index], num_channels[branch_index]))
+        return nn.Sequential(*layers)
+    def _make_branches(self, num_branches, block, num_blocks, num_channels):
+        return nn.ModuleList([self._make_one_branch(i, block, num_blocks, num_channels) for i in range(num_branches)])
+    def _make_fuse_layers(self):
+        if self.num_branches == 1:
+            return None
+        fuse_layers = []
+        for i in range(self.num_branches if self.multi_scale_output else 1):
+            fuse_layer = []
+            for j in range(self.num_branches):
+                if j > i:
+                    fuse_layer.append(nn.Sequential(
+                        nn.Conv2d(self.num_inchannels[j], self.num_inchannels[i], 1, bias=False),
+                        nn.BatchNorm2d(self.num_inchannels[i]),
+                        nn.Upsample(scale_factor=2**(j-i), mode='nearest')))
+                elif j == i:
+                    fuse_layer.append(None)
+                else:
+                    conv3x3s = []
+                    for k in range(i-j):
+                        out_ch = self.num_inchannels[i] if k == i - j - 1 else self.num_inchannels[j]
+                        conv3x3s.append(nn.Sequential(
+                            nn.Conv2d(self.num_inchannels[j], out_ch, 3, 2, 1, bias=False),
+                            nn.BatchNorm2d(out_ch),
+                            nn.ReLU(True) if k < i - j - 1 else nn.Identity()))
+                    fuse_layer.append(nn.Sequential(*conv3x3s))
+            fuse_layers.append(nn.ModuleList(fuse_layer))
+        return nn.ModuleList(fuse_layers)
+    def get_num_inchannels(self):
+        return self.num_inchannels
+    def forward(self, x):
+        if self.num_branches == 1:
+            return [self.branches[0](x[0])]
+        for i in range(self.num_branches):
+            x[i] = self.branches[i](x[i])
+        x_fuse = []
+        for i in range(len(self.fuse_layers)):
+            y = x[0] if i == 0 else self.fuse_layers[i][0](x[0])
+            for j in range(1, self.num_branches):
+                if i == j:
+                    y = y + x[j]
+                else:
+                    y = y + self.fuse_layers[i][j](x[j])
+            x_fuse.append(self.relu(y))
+        return x_fuse
+blocks_dict = {'BASIC': BasicBlock, 'BOTTLENECK': Bottleneck}
+class HRNet(nn.Module):
+    def __init__(self, num_joints=19):
+        super(HRNet, self).__init__()
+        self.inplanes = 64
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)
+        self.conv2 = nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)
+        self.relu = nn.ReLU(inplace=True)
+        self.layer1 = self._make_layer(Bottleneck, 64, 4)
+        self.stage2_cfg = {'NUM_MODULES': 1, 'NUM_BRANCHES': 2, 'BLOCK': 'BASIC', 'NUM_BLOCKS': [4, 4], 'NUM_CHANNELS': [32, 64]}
+        num_channels = [ch * BasicBlock.expansion for ch in self.stage2_cfg['NUM_CHANNELS']]
+        self.transition1 = self._make_transition_layer([256], num_channels)
+        self.stage2, pre_stage_channels = self._make_stage(self.stage2_cfg, num_channels)
+        self.stage3_cfg = {'NUM_MODULES': 4, 'NUM_BRANCHES': 3, 'BLOCK': 'BASIC', 'NUM_BLOCKS': [4, 4, 4], 'NUM_CHANNELS': [32, 64, 128]}
+        num_channels = [ch * BasicBlock.expansion for ch in self.stage3_cfg['NUM_CHANNELS']]
+        self.transition2 = self._make_transition_layer(pre_stage_channels, num_channels)
+        self.stage3, pre_stage_channels = self._make_stage(self.stage3_cfg, num_channels)
+        self.stage4_cfg = {'NUM_MODULES': 3, 'NUM_BRANCHES': 4, 'BLOCK': 'BASIC', 'NUM_BLOCKS': [4, 4, 4, 4], 'NUM_CHANNELS': [32, 64, 128, 256]}
+        num_channels = [ch * BasicBlock.expansion for ch in self.stage4_cfg['NUM_CHANNELS']]
+        self.transition3 = self._make_transition_layer(pre_stage_channels, num_channels)
+        self.stage4, pre_stage_channels = self._make_stage(self.stage4_cfg, num_channels, multi_scale_output=False)
+        self.final_layer = nn.Conv2d(pre_stage_channels[0], num_joints, kernel_size=1, stride=1, padding=0)
+    def _make_transition_layer(self, num_channels_pre, num_channels_cur):
+        num_branches_cur = len(num_channels_cur)
+        num_branches_pre = len(num_channels_pre)
+        transition_layers = []
+        for i in range(num_branches_cur):
+            if i < num_branches_pre:
+                if num_channels_cur[i] != num_channels_pre[i]:
+                    transition_layers.append(nn.Sequential(
+                        nn.Conv2d(num_channels_pre[i], num_channels_cur[i], 3, 1, 1, bias=False),
+                        nn.BatchNorm2d(num_channels_cur[i]), nn.ReLU(inplace=True)))
+                else:
+                    transition_layers.append(None)
+            else:
+                conv3x3s = []
+                for j in range(i + 1 - num_branches_pre):
+                    inchannels = num_channels_pre[-1]
+                    outchannels = num_channels_cur[i] if j == i - num_branches_pre else inchannels
+                    conv3x3s.append(nn.Sequential(
+                        nn.Conv2d(inchannels, outchannels, 3, 2, 1, bias=False),
+                        nn.BatchNorm2d(outchannels), nn.ReLU(inplace=True)))
+                transition_layers.append(nn.Sequential(*conv3x3s))
+        return nn.ModuleList(transition_layers)
+    def _make_layer(self, block, planes, blocks, stride=1):
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2d(self.inplanes, planes * block.expansion, 1, stride, bias=False),
+                nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM))
+        layers = [block(self.inplanes, planes, stride, downsample)]
+        self.inplanes = planes * block.expansion
+        for _ in range(1, blocks):
+            layers.append(block(self.inplanes, planes))
+        return nn.Sequential(*layers)
+    def _make_stage(self, layer_config, num_inchannels, multi_scale_output=True):
+        num_modules = layer_config['NUM_MODULES']
+        num_branches = layer_config['NUM_BRANCHES']
+        num_blocks = layer_config['NUM_BLOCKS']
+        num_channels = layer_config['NUM_CHANNELS']
+        block = blocks_dict[layer_config['BLOCK']]
+        modules = []
+        for i in range(num_modules):
+            reset_multi_scale = multi_scale_output or i < num_modules - 1
+            modules.append(HighResolutionModule(num_branches, block, num_blocks, num_inchannels, num_channels, 'SUM', reset_multi_scale))
+            num_inchannels = modules[-1].get_num_inchannels()
+        return nn.Sequential(*modules), num_inchannels
+    def forward(self, x):
+        x = self.relu(self.bn1(self.conv1(x)))
+        x = self.relu(self.bn2(self.conv2(x)))
+        x = self.layer1(x)
+        x_list = [self.transition1[i](x) if self.transition1[i] else x for i in range(self.stage2_cfg['NUM_BRANCHES'])]
+        y_list = self.stage2(x_list)
+        x_list = []
+        for i in range(self.stage3_cfg['NUM_BRANCHES']):
+            idx = min(i, len(y_list)-1)
+            x_list.append(self.transition2[i](y_list[idx]) if self.transition2[i] else y_list[i])
+        y_list = self.stage3(x_list)
+        x_list = []
+        for i in range(self.stage4_cfg['NUM_BRANCHES']):
+            idx = min(i, len(y_list)-1)
+            x_list.append(self.transition3[i](y_list[idx]) if self.transition3[i] else y_list[i])
+        y_list = self.stage4(x_list)
+        return self.final_layer(y_list[0])
 # ============================================================================
+# MODELO GLOBAL
 # ============================================================================
 model = None
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 def load_model():
     global model
     if model is not None:
         return model
     print(f"Cargando modelo en {device}...")
+    model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE)
     checkpoint = torch.load(model_path, map_location=device, weights_only=False)
     if 'model_state_dict' in checkpoint:
         state_dict = checkpoint['model_state_dict']
     elif 'state_dict' in checkpoint:
     else:
         state_dict = checkpoint
+    # Analizar estructura del checkpoint
+    print(f"Keys en checkpoint: {len(state_dict)}")
+    sample_keys = list(state_dict.keys())[:5]
+    print(f"Ejemplo de keys: {sample_keys}")
+    # Limpiar prefijos comunes
     new_state_dict = {}
     for k, v in state_dict.items():
+        name = k
+        for prefix in ['module.', 'backbone.', 'model.']:
+            if name.startswith(prefix):
+                name = name[len(prefix):]
         new_state_dict[name] = v
+    model = HRNet(num_joints=NUM_LANDMARKS)
+    try:
+        model.load_state_dict(new_state_dict, strict=True)
+        print("✓ Pesos cargados correctamente (strict=True)")
+    except Exception as e:
+        print(f"⚠ Carga estricta falló: {e}")
+        missing, unexpected = model.load_state_dict(new_state_dict, strict=False)
+        print(f"  - Keys faltantes: {len(missing)}")
+        print(f"  - Keys inesperadas: {len(unexpected)}")
     model.to(device)
     model.eval()
+    print("✓ Modelo listo!")
     return model
+def get_max_preds(batch_heatmaps):
+    batch_size, num_joints, h, w = batch_heatmaps.shape
+    heatmaps_reshaped = batch_heatmaps.reshape((batch_size, num_joints, -1))
+    idx = np.argmax(heatmaps_reshaped, 2)
+    maxvals = np.amax(heatmaps_reshaped, 2)
+    preds = np.zeros((batch_size, num_joints, 2), dtype=np.float32)
+    preds[:, :, 0] = idx % w
+    preds[:, :, 1] = idx // w
+    return preds, maxvals.reshape((batch_size, num_joints, 1))
+def detect_landmarks(image):
+    model = load_model()
     if isinstance(image, np.ndarray):
         image = Image.fromarray(image)
+    original_size = image.size
     if image.mode != 'RGB':
         image = image.convert('RGB')
+    image_resized = image.resize((INPUT_SIZE, INPUT_SIZE), Image.Resampling.BILINEAR)
+    img_array = np.array(image_resized).astype(np.float32) / 255.0
     mean = np.array([0.485, 0.456, 0.406])
     std = np.array([0.229, 0.224, 0.225])
     img_array = (img_array - mean) / std
+    img_tensor = torch.from_numpy(img_array.transpose(2, 0, 1)).float().unsqueeze(0).to(device)
+    with torch.no_grad():
+        output = model(img_tensor)
+    heatmaps = output.cpu().numpy()
+    preds, maxvals = get_max_preds(heatmaps)
+    heatmap_h, heatmap_w = heatmaps.shape[2], heatmaps.shape[3]
     orig_w, orig_h = original_size
     scale_x = orig_w / heatmap_w
     scale_y = orig_h / heatmap_h
     landmarks = []
     for i in range(NUM_LANDMARKS):
         landmarks.append({
             "name": LANDMARK_NAMES[i],
+            "x": round(float(preds[0, i, 0] * scale_x), 2),
+            "y": round(float(preds[0, i, 1] * scale_y), 2),
+            "confidence": round(float(maxvals[0, i, 0]), 4)
         })
     annotated = draw_landmarks(image, landmarks)
     return landmarks, annotated
+def draw_landmarks(image, landmarks, radius=6):
     if isinstance(image, np.ndarray):
         image = Image.fromarray(image)
+    img_draw = image.copy().convert('RGB')
     draw = ImageDraw.Draw(img_draw)
     try:
+        font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", 14)
     except:
         font = ImageFont.load_default()
     for lm in landmarks:
         x, y = lm['x'], lm['y']
+        color = LANDMARK_COLORS.get(lm['name'], (255, 255, 255))
+        draw.ellipse([x-radius, y-radius, x+radius, y+radius], fill=color, outline=(255, 255, 255), width=2)
+        draw.text((x+radius+3, y-7), lm['name'], fill=(255, 255, 255), font=font, stroke_width=2, stroke_fill=(0, 0, 0))
     return img_draw
 # ============================================================================
 # INTERFAZ GRADIO
 # ============================================================================
 def process_image(image):
     if image is None:
+        return None, json.dumps({"error": "Por favor sube una imagen"}, indent=2)
     try:
         landmarks, annotated = detect_landmarks(image)
+        result = {
             "success": True,
             "num_landmarks": len(landmarks),
             "landmarks": landmarks
         }
+        return annotated, json.dumps(result, indent=2)
     except Exception as e:
+        import traceback
+        return None, json.dumps({"success": False, "error": str(e), "traceback": traceback.format_exc()}, indent=2)
+print("=" * 50)
+print("Inicializando Cephalometric Landmark Detection...")
+print("=" * 50)
 load_model()
+with gr.Blocks(title="Cephalometric Landmark Detection", theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 🦷 Detección de Landmarks Cefalométricos\n\nDetección automática de **19 puntos anatómicos** usando HRNet-W32.")
     with gr.Row():
         with gr.Column():
+            input_image = gr.Image(label="📤 Radiografía", type="pil", height=400)
+            detect_btn = gr.Button("🔍 Detectar", variant="primary", size="lg")
         with gr.Column():
+            output_image = gr.Image(label="📍 Resultado", height=400)
+            output_json = gr.Code(label="📋 JSON", language="json", lines=12)
+    detect_btn.click(fn=process_image, inputs=[input_image], outputs=[output_image, output_json])
 demo.queue()
+if __name__ == "__main__":
     demo.launch(ssr_mode=False)

requirements.txt CHANGED Viewed

@@ -1,14 +1,6 @@
-# Core ML
 torch>=2.0.0
 torchvision>=0.15.0
-# Hugging Face
 huggingface_hub>=0.19.0
 gradio>=4.0.0
-# Image processing
 Pillow>=10.0.0
 numpy>=1.24.0
-# Optional but useful
-scipy>=1.10.0

 torch>=2.0.0
 torchvision>=0.15.0
 huggingface_hub>=0.19.0
 gradio>=4.0.0
 Pillow>=10.0.0
 numpy>=1.24.0