import json
import os

import numpy as np
import tensorflow as tf
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.layers import (
    BatchNormalization, Conv2D, Dense, Dropout,
    GlobalAveragePooling2D, MaxPooling2D, RandomRotation, RandomTranslation, RandomZoom
)
from tensorflow.keras.models import Sequential

ROOT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
PROCESSED_DIR = os.path.join(ROOT_DIR, 'dataset', 'processed')
MODEL_DIR = os.path.join(ROOT_DIR, 'model')

# Use all CPU cores
tf.config.threading.set_intra_op_parallelism_threads(12)
tf.config.threading.set_inter_op_parallelism_threads(12)

os.makedirs(MODEL_DIR, exist_ok=True)

X = np.load(os.path.join(PROCESSED_DIR, 'X_train.npy'))
y = np.load(os.path.join(PROCESSED_DIR, 'y_train.npy'))
X_val = np.load(os.path.join(PROCESSED_DIR, 'X_val.npy'))
y_val = np.load(os.path.join(PROCESSED_DIR, 'y_val.npy'))

print(f"Data range: {X.min():.3f} to {X.max():.3f}")
print(f"Data shape: {X.shape}")
print(f"Data type: {X.dtype}")

with open(os.path.join(MODEL_DIR, 'classes.json'), "r", encoding="utf-8") as f:
    class_mapping = json.load(f)
num_classes = len(class_mapping)

X_train, y_train = X, y


def build_model(input_shape=(28, 28, 1), n_classes=20):
    net = Sequential([
        tf.keras.layers.Input(shape=input_shape),
        # Data augmentation - active only during training, disabled at inference
        RandomRotation(0.15),
        RandomTranslation(0.1, 0.1),
        RandomZoom(0.1),

        # 28x28
        Conv2D(32, (3, 3), padding='same', activation='relu', kernel_initializer='he_normal'),
        BatchNormalization(),
        MaxPooling2D(),
        Dropout(0.25),

        # 14x14
        Conv2D(64, (3, 3), padding='same', activation='relu', kernel_initializer='he_normal'),
        BatchNormalization(),
        MaxPooling2D(),
        Dropout(0.3),

        # 7x7
        Conv2D(128, (3, 3), padding='same', activation='relu', kernel_initializer='he_normal'),
        BatchNormalization(),
        MaxPooling2D(),
        Dropout(0.3),

        # 3x3
        Conv2D(256, (3, 3), padding='same', activation='relu', kernel_initializer='he_normal'),
        BatchNormalization(),
        GlobalAveragePooling2D(),
        Dropout(0.4),

        Dense(256, activation='relu', kernel_initializer='he_normal'),
        Dropout(0.4),
        Dense(128, activation='relu', kernel_initializer='he_normal'),
        Dropout(0.3),
        Dense(n_classes, activation='softmax')
    ])
    return net


model = build_model(input_shape=(28, 28, 1), n_classes=num_classes)

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

model.summary()

callbacks = [
    ModelCheckpoint(
        os.path.join(MODEL_DIR, 'best_model.keras'),
        save_best_only=True,
        monitor="val_accuracy",
        mode="max",
        verbose=1
    ),
    EarlyStopping(monitor="val_loss", patience=15, restore_best_weights=True, verbose=1),
    ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=7, min_lr=1e-6, verbose=1)
]

print("Starting training...")
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    batch_size=256,
    epochs=80,
    callbacks=callbacks,
    verbose=1
)

final_val_acc = max(history.history['val_accuracy'])
print(f"Best validation accuracy: {final_val_acc:.4f} ({final_val_acc * 100:.2f}%)")