import numpy as np

# 난수 생성을 위한 헬퍼 (가중치를 의미)
def randn(*shape):
    # Xavier/Glorot 초기화와 유사하게 스케일링 (이해를 돕기 위함)
    return np.random.randn(*shape) * np.sqrt(2.0 / (shape[0] * np.prod(shape[2:])))

def randn_bias(*shape):
    return np.zeros(shape)

class NumpyUNet:
    def __init__(self, in_channels=1, out_classes=2):
        """
        NumPy로 U-Net 가중치를 초기화합니다.
        여기서는 2-Level U-Net을 하드코딩합니다. (예: 64 -> 128 -> 256(바닥) -> 128 -> 64)
        """
        self.weights = {}
        
        # --- 인코더 (Encoder) 가중치 ---
        # Level 1 (Input -> 64 filters)
        self.weights['enc1_w1'] = randn(64, in_channels, 3, 3)
        self.weights['enc1_b1'] = randn_bias(64)
        self.weights['enc1_w2'] = randn(64, 64, 3, 3)
        self.weights['enc1_b2'] = randn_bias(64)
        
        # Level 2 (64 -> 128 filters)
        self.weights['enc2_w1'] = randn(128, 64, 3, 3)
        self.weights['enc2_b1'] = randn_bias(128)
        self.weights['enc2_w2'] = randn(128, 128, 3, 3)
        self.weights['enc2_b2'] = randn_bias(128)
        
        # --- 바닥 (Bottleneck) 가중치 ---
        # (128 -> 256 filters)
        self.weights['bottle_w1'] = randn(256, 128, 3, 3)
        self.weights['bottle_b1'] = randn_bias(256)
        self.weights['bottle_w2'] = randn(256, 256, 3, 3)
        self.weights['bottle_b2'] = randn_bias(256)

        # --- 디코더 (Decoder) 가중치 ---
        # Level 1 (Up-Conv 256 + Skip 128 = 384 -> 128 filters)
        self.weights['dec1_w1'] = randn(128, 384, 3, 3)
        self.weights['dec1_b1'] = randn_bias(128)
        self.weights['dec1_w2'] = randn(128, 128, 3, 3)
        self.weights['dec1_b2'] = randn_bias(128)
        
        # Level 2 (Up-Conv 128 + Skip 64 = 192 -> 64 filters)
        self.weights['dec2_w1'] = randn(64, 192, 3, 3)
        self.weights['dec2_b1'] = randn_bias(64)
        self.weights['dec2_w2'] = randn(64, 64, 3, 3)
        self.weights['dec2_b2'] = randn_bias(64)
        
        # --- 최종 1x1 Conv ---
        self.weights['final_w'] = randn(out_classes, 64, 1, 1)
        self.weights['final_b'] = randn_bias(out_classes)

    # --- U-Net의 핵심 연산들 ---

    def _relu(self, x):
        return np.maximum(0, x)

    def _conv2d(self, x, kernel, bias, padding=1):
        """
        NumPy를 사용한 'same' 2D 컨볼루션 (stride=1)
        x: (In_C, H, W)
        kernel: (Out_C, In_C, K, K)
        bias: (Out_C,)
        """
        in_C, in_H, in_W = x.shape
        out_C, _, K, _ = kernel.shape
        
        # 패딩 적용 ('same'을 위해)
        padded_x = np.pad(x, ((0, 0), (padding, padding), (padding, padding)), 'constant')
        
        # 출력 맵 초기화
        out_H, out_W = in_H, in_W # 'same' 패딩이므로 크기 동일
        output = np.zeros((out_C, out_H, out_W))
        
        # 컨볼루션 연산 (매우 느린 이중 루프)
        for k in range(out_C): # 출력 채널
            for i in range(out_H): # 높이
                for j in range(out_W): # 너비
                    # (In_C, K, K) 크기의 패치를 잘라냄
                    patch = padded_x[:, i:i+K, j:j+K]
                    # (Out_C[k], In_C, K, K) 커널과 요소별 곱셈 후 합산
                    output[k, i, j] = np.sum(patch * kernel[k]) + bias[k]
        return output

    def _max_pool2d(self, x, pool_size=2):
        """ 2x2 Max Pooling """
        in_C, in_H, in_W = x.shape
        out_H = in_H // pool_size
        out_W = in_W // pool_size
        output = np.zeros((in_C, out_H, out_W))
        
        for c in range(in_C):
            for i in range(out_H):
                for j in range(out_W):
                    patch = x[c, i*pool_size:(i+1)*pool_size, j*pool_size:(j+1)*pool_size]
                    output[c, i, j] = np.max(patch)
        return output

    def _upsample2d(self, x, scale=2):
        """
        Transposed Conv 대신 간단한 Nearest-neighbor 업샘플링 구현
        """
        # np.repeat를 사용하여 각 행과 열을 'scale'만큼 반복
        return x.repeat(scale, axis=1).repeat(scale, axis=2)

    def _conv_block(self, x, w1, b1, w2, b2):
        """ (3x3 Conv + ReLU) * 2회 반복 블록 """
        x = self._conv2d(x, w1, b1, padding=1)
        x = self._relu(x)
        x = self._conv2d(x, w2, b2, padding=1)
        x = self._relu(x)
        return x

    # --- U-Net 순전파 (Forward Pass) ---

    def forward(self, x):
        """
        U-Net 아키텍처를 따라 순전파를 수행합니다.
        x: (In_C, H, W)
        """
        w = self.weights
        skip_connections = []
        
        print(f"Input: \t\t{x.shape}")
        
        # === 1. 인코더 (수축 경로) ===
        # Level 1
        e1 = self._conv_block(x, w['enc1_w1'], w['enc1_b1'], w['enc1_w2'], w['enc1_b2'])
        p1 = self._max_pool2d(e1)
        skip_connections.append(e1) # 스킵 연결을 위해 저장
        print(f"Encoder 1: \t{e1.shape} -> Pool: {p1.shape}")
        
        # Level 2
        e2 = self._conv_block(p1, w['enc2_w1'], w['enc2_b1'], w['enc2_w2'], w['enc2_b2'])
        p2 = self._max_pool2d(e2)
        skip_connections.append(e2) # 스킵 연결을 위해 저장
        print(f"Encoder 2: \t{e2.shape} -> Pool: {p2.shape}")

        # === 2. 바닥 (Bottleneck) ===
        b = self._conv_block(p2, w['bottle_w1'], w['bottle_b1'], w['bottle_w2'], w['bottle_b2'])
        print(f"Bottleneck: \t{b.shape}")

        # === 3. 디코더 (확장 경로) ===
        skip_connections = skip_connections[::-1] # 순서 뒤집기 (LIFO)

        # Level 1
        u1 = self._upsample2d(b)
        s1 = skip_connections[0] # Encoder 2의 출력 (e2)
        c1 = np.concatenate((u1, s1), axis=0) # 채널 축(axis=0)으로 결합
        d1 = self._conv_block(c1, w['dec1_w1'], w['dec1_b1'], w['dec1_w2'], w['dec1_b2'])
        print(f"Decoder 1: \tUp: {u1.shape} + Skip: {s1.shape} = Concat: {c1.shape} -> Block: {d1.shape}")
        
        # Level 2
        u2 = self._upsample2d(d1)
        s2 = skip_connections[1] # Encoder 1의 출력 (e1)
        c2 = np.concatenate((u2, s2), axis=0) # 결합
        d2 = self._conv_block(c2, w['dec2_w1'], w['dec2_b1'], w['dec2_w2'], w['dec2_b2'])
        print(f"Decoder 2: \tUp: {u2.shape} + Skip: {s2.shape} = Concat: {c2.shape} -> Block: {d2.shape}")
        
        # === 4. 최종 1x1 Conv ===
        # 1x1 Conv는 3x3 Conv와 동일하지만 K=1, padding=0을 사용
        output = self._conv2d(d2, w['final_w'], w['final_b'], padding=0)
        print(f"Final 1x1 Conv: {output.shape}")
        
        return output

# --- 실행 예시 ---
if __name__ == "__main__":
    # (채널, 높이, 너비) - 높이/너비는 2의 배수여야 함
    # (매우 느리므로 작은 이미지 사용)
    dummy_image = np.random.randn(1, 32, 32) 
    
    # 모델 초기화 (입력 채널 1, 출력 클래스 2)
    model = NumpyUNet(in_channels=1, out_classes=2)
    
    print("--- U-Net Forward Pass Start ---")
    
    # 순전파 실행
    output_map = model.forward(dummy_image)
    
    print("--- U-Net Forward Pass End ---")
    print(f"\n최종 입력 이미지 Shape: {dummy_image.shape}")
    print(f"최종 출력 맵 Shape: {output_map.shape}")
    
    # 입력과 출력의 높이/너비가 동일하고 채널 수만 바뀐 것을 확인
    assert dummy_image.shape[1:] == output_map.shape[1:]
    assert output_map.shape[0] == 2