import numexpr
import numpy as np
import sympy

def get_gradient_1d(function):
    x = sympy.symbols('x')
    expr = sympy.sympify(function)
    grad_x = sympy.diff(expr, x)
    return grad_x


def get_hessian_1d(function):
    x = sympy.symbols('x')
    expr = sympy.sympify(function)
    hess_x = sympy.diff(expr, x, 2)
    return hess_x


def get_gradient_2d(function):
    x, y = sympy.symbols('x y')
    expr = sympy.sympify(function)
    grad_x = sympy.diff(expr, x)
    grad_y = sympy.diff(expr, y)
    return grad_x, grad_y


def get_hessian_2d(function):
    x, y = sympy.symbols('x y')
    expr = sympy.sympify(function)
    hess_xx = sympy.diff(expr, x, 2)
    hess_yy = sympy.diff(expr, y, 2)
    hess_xy = sympy.diff(expr, x, y)
    hess_yx = sympy.diff(expr, y, x)
    return hess_xx, hess_xy, hess_yx, hess_yy


def get_optimizer_trajectory_1d(function, initial_x, optimiser_type, learning_rate, momentum, num_steps):
    if optimiser_type == "Gradient Descent":
        return get_gd_trajectory_1d(function, initial_x, learning_rate, momentum, num_steps)
    elif optimiser_type == "Newton":
        return get_newton_trajectory_1d(function, initial_x, num_steps)
    else:
        raise ValueError(f"Unsupported optimiser type: {optimiser_type}")


def get_gd_trajectory_1d(function, initial_x, learning_rate, momentum, num_steps):
    grad_x = get_gradient_1d(function)

    trajectory_x = np.zeros(num_steps + 1)
    trajectory_y = np.zeros(num_steps + 1)
    trajectory_x[0] = initial_x
    trajectory_y[0] = numexpr.evaluate(function, local_dict={'x': initial_x})

    for i in range(num_steps):
        grad_x_val = float(grad_x.evalf(subs={'x': trajectory_x[i]}))
        if i == 0:
            momentum_x = 0
        else:
            momentum_x = momentum * (trajectory_x[i] - trajectory_x[i - 1])

        trajectory_x[i + 1] = trajectory_x[i] - learning_rate * grad_x_val + momentum_x
        trajectory_y[i + 1] = numexpr.evaluate(function, local_dict={'x': trajectory_x[i + 1]})

    return trajectory_x, trajectory_y


def get_newton_trajectory_1d(function, initial_x, num_steps):
    grad_x = get_gradient_1d(function)
    hess_x = get_hessian_1d(function)

    trajectory_x = np.zeros(num_steps + 1)
    trajectory_y = np.zeros(num_steps + 1)
    trajectory_x[0] = initial_x
    trajectory_y[0] = numexpr.evaluate(function, local_dict={'x': initial_x})

    for i in range(num_steps):
        grad_x_val = float(grad_x.evalf(subs={'x': trajectory_x[i]}))
        hess_x_val = float(hess_x.evalf(subs={'x': trajectory_x[i]}))

        if hess_x_val == 0:
            break
        trajectory_x[i + 1] = trajectory_x[i] - grad_x_val / hess_x_val
        trajectory_y[i + 1] = numexpr.evaluate(function, local_dict={'x': trajectory_x[i + 1]})

    return trajectory_x, trajectory_y


def get_gd_trajectory_2d(function, initial_x, initial_y, learning_rate, momentum, num_steps):
    grad_x, grad_y = get_gradient_2d(function)

    trajectory_x = np.zeros(num_steps + 1)
    trajectory_y = np.zeros(num_steps + 1)
    trajectory_z = np.zeros(num_steps + 1)
    trajectory_x[0] = initial_x
    trajectory_y[0] = initial_y
    trajectory_z[0] = numexpr.evaluate(function, local_dict={'x': initial_x, 'y': initial_y})

    for i in range(num_steps):
        grad_x_val = float(grad_x.evalf(subs={'x': trajectory_x[i], 'y': trajectory_y[i]}))
        grad_y_val = float(grad_y.evalf(subs={'x': trajectory_x[i], 'y': trajectory_y[i]}))

        if i == 0:
            momentum_x = 0
            momentum_y = 0
        else:
            momentum_x = momentum * (trajectory_x[i] - trajectory_x[i - 1])
            momentum_y = momentum * (trajectory_y[i] - trajectory_y[i - 1])

        trajectory_x[i + 1] = trajectory_x[i] - learning_rate * grad_x_val + momentum_x
        trajectory_y[i + 1] = trajectory_y[i] - learning_rate * grad_y_val + momentum_y
        trajectory_z[i + 1] = numexpr.evaluate(function, local_dict={'x': trajectory_x[i + 1], 'y': trajectory_y[i + 1]})

    return trajectory_x, trajectory_y, trajectory_z


def get_nesterov_trajectory_2d(function, initial_x, initial_y, learning_rate, momentum, num_steps):
    grad_x, grad_y = get_gradient_2d(function)

    trajectory_x = np.zeros(num_steps + 1)
    trajectory_y = np.zeros(num_steps + 1)
    trajectory_z = np.zeros(num_steps + 1)
    trajectory_x[0] = initial_x
    trajectory_y[0] = initial_y
    trajectory_z[0] = numexpr.evaluate(function, local_dict={'x': initial_x, 'y': initial_y})

    for i in range(num_steps):
        if i == 0:
            momentum_x = 0
            momentum_y = 0
        else:
            momentum_x = momentum * (trajectory_x[i] - trajectory_x[i - 1])
            momentum_y = momentum * (trajectory_y[i] - trajectory_y[i - 1])

        x = trajectory_x[i] + momentum_x
        y = trajectory_y[i] + momentum_y
        grad_x_val = float(grad_x.evalf(subs={'x': x, 'y': y}))
        grad_y_val = float(grad_y.evalf(subs={'x': x, 'y': y}))

        trajectory_x[i + 1] = trajectory_x[i] - learning_rate * grad_x_val
        trajectory_y[i + 1] = trajectory_y[i] - learning_rate * grad_y_val
        trajectory_z[i + 1] = numexpr.evaluate(function, local_dict={'x': trajectory_x[i + 1], 'y': trajectory_y[i + 1]})

    return trajectory_x, trajectory_y, trajectory_z


def get_adam_trajectory_2d(function, initial_x, initial_y, learning_rate, rho1, rho2, epsilon, num_steps):
    grad_x, grad_y = get_gradient_2d(function)

    trajectory_x = np.zeros(num_steps + 1)
    trajectory_y = np.zeros(num_steps + 1)
    trajectory_z = np.zeros(num_steps + 1)
    trajectory_x[0] = initial_x
    trajectory_y[0] = initial_y
    trajectory_z[0] = numexpr.evaluate(function, local_dict={'x': initial_x, 'y': initial_y})

    m_x, m_y = 0, 0
    v_x, v_y = 0, 0
    epsilon = 1e-8

    for i in range(num_steps):
        grad_x_val = float(grad_x.evalf(subs={'x': trajectory_x[i], 'y': trajectory_y[i]}))
        grad_y_val = float(grad_y.evalf(subs={'x': trajectory_x[i], 'y': trajectory_y[i]}))

        m_x = rho1 * m_x + (1 - rho1) * grad_x_val
        m_y = rho1 * m_y + (1 - rho1) * grad_y_val

        v_x = rho2 * v_x + (1 - rho2) * (grad_x_val ** 2)
        v_y = rho2 * v_y + (1 - rho2) * (grad_y_val ** 2)

        m_hat_x = m_x / (1 - rho1 ** (i + 1))
        m_hat_y = m_y / (1 - rho1 ** (i + 1))

        v_hat_x = v_x / (1 - rho2 ** (i + 1))
        v_hat_y = v_y / (1 - rho2 ** (i + 1))

        trajectory_x[i + 1] = trajectory_x[i] - learning_rate * m_hat_x / np.sqrt(v_hat_x + epsilon)
        trajectory_y[i + 1] = trajectory_y[i] - learning_rate * m_hat_y / np.sqrt(v_hat_y + epsilon)
        trajectory_z[i + 1] = numexpr.evaluate(function, local_dict={'x': trajectory_x[i + 1], 'y': trajectory_y[i + 1]})

    return trajectory_x, trajectory_y, trajectory_z


def get_newton_trajectory_2d(function, initial_x, initial_y, num_steps):
    grad_x, grad_y = get_gradient_2d(function)
    hess_xx, hess_xy, hess_yx, hess_yy = get_hessian_2d(function)

    trajectory_x = np.zeros(num_steps + 1)
    trajectory_y = np.zeros(num_steps + 1)
    trajectory_z = np.zeros(num_steps + 1)
    trajectory_x[0] = initial_x
    trajectory_y[0] = initial_y
    trajectory_z[0] = numexpr.evaluate(function, local_dict={'x': initial_x, 'y': initial_y})

    for i in range(num_steps):
        grad_x_val = float(grad_x.evalf(subs={'x': trajectory_x[i], 'y': trajectory_y[i]}))
        grad_y_val = float(grad_y.evalf(subs={'x': trajectory_x[i], 'y': trajectory_y[i]}))

        hess_xx_val = float(hess_xx.evalf(subs={'x': trajectory_x[i], 'y': trajectory_y[i]}))
        hess_xy_val = float(hess_xy.evalf(subs={'x': trajectory_x[i], 'y': trajectory_y[i]}))
        hess_yx_val = float(hess_yx.evalf(subs={'x': trajectory_x[i], 'y': trajectory_y[i]}))
        hess_yy_val = float(hess_yy.evalf(subs={'x': trajectory_x[i], 'y': trajectory_y[i]}))

        hessian_matrix = np.array(
            [
                [hess_xx_val, hess_xy_val], 
                [hess_yx_val, hess_yy_val]
            ],
        )

        gradient_vector = np.array([grad_x_val, grad_y_val])

        try:
            hessian_inv = np.linalg.inv(hessian_matrix)
        except np.linalg.LinAlgError:
            break

        step = hessian_inv @ gradient_vector

        trajectory_x[i + 1] = trajectory_x[i] - step[0]
        trajectory_y[i + 1] = trajectory_y[i] - step[1]
        trajectory_z[i + 1] = numexpr.evaluate(function, local_dict={'x': trajectory_x[i + 1], 'y': trajectory_y[i + 1]})

    return trajectory_x, trajectory_y, trajectory_z


def get_adagrad_trajectory_2d(function, initial_x, initial_y, learning_rate, epsilon, num_steps):
    grad_x, grad_y = get_gradient_2d(function)

    trajectory_x = np.zeros(num_steps + 1)
    trajectory_y = np.zeros(num_steps + 1)
    trajectory_z = np.zeros(num_steps + 1)
    trajectory_x[0] = initial_x
    trajectory_y[0] = initial_y
    trajectory_z[0] = numexpr.evaluate(function, local_dict={'x': initial_x, 'y': initial_y})

    v_x = 0
    v_y = 0

    for i in range(num_steps):
        grad_x_val = float(grad_x.evalf(subs={'x': trajectory_x[i], 'y': trajectory_y[i]}))
        grad_y_val = float(grad_y.evalf(subs={'x': trajectory_x[i], 'y': trajectory_y[i]}))

        v_x += grad_x_val ** 2
        v_y += grad_y_val ** 2

        trajectory_x[i + 1] = trajectory_x[i] - learning_rate / np.sqrt(v_x + epsilon) * grad_x_val
        trajectory_y[i + 1] = trajectory_y[i] - learning_rate / np.sqrt(v_y + epsilon) * grad_y_val
        trajectory_z[i + 1] = numexpr.evaluate(function, local_dict={'x': trajectory_x[i + 1], 'y': trajectory_y[i + 1]})

    return trajectory_x, trajectory_y, trajectory_z


def get_rmsprop_trajectory_2d(function, initial_x, initial_y, learning_rate, rho, epsilon, num_steps):
    grad_x, grad_y = get_gradient_2d(function)

    trajectory_x = np.zeros(num_steps + 1)
    trajectory_y = np.zeros(num_steps + 1)
    trajectory_z = np.zeros(num_steps + 1)
    trajectory_x[0] = initial_x
    trajectory_y[0] = initial_y
    trajectory_z[0] = numexpr.evaluate(function, local_dict={'x': initial_x, 'y': initial_y})

    v_x = 0
    v_y = 0

    for i in range(num_steps):
        grad_x_val = float(grad_x.evalf(subs={'x': trajectory_x[i], 'y': trajectory_y[i]}))
        grad_y_val = float(grad_y.evalf(subs={'x': trajectory_x[i], 'y': trajectory_y[i]}))

        v_x = rho * v_x + (1 - rho) * (grad_x_val ** 2)
        v_y = rho * v_y + (1 - rho) * (grad_y_val ** 2)

        trajectory_x[i + 1] = trajectory_x[i] - learning_rate / np.sqrt(v_x + epsilon)  * grad_x_val
        trajectory_y[i + 1] = trajectory_y[i] - learning_rate / np.sqrt(v_y + epsilon) * grad_y_val
        trajectory_z[i + 1] = numexpr.evaluate(function, local_dict={'x': trajectory_x[i + 1], 'y': trajectory_y[i + 1]})

    return trajectory_x, trajectory_y, trajectory_z


def get_adadelta_trajectory_2d(function, initial_x, initial_y, learning_rate, rho, epsilon, num_steps):
    grad_x, grad_y = get_gradient_2d(function)

    trajectory_x = np.zeros(num_steps + 1)
    trajectory_y = np.zeros(num_steps + 1)
    trajectory_z = np.zeros(num_steps + 1)
    trajectory_x[0] = initial_x
    trajectory_y[0] = initial_y
    trajectory_z[0] = numexpr.evaluate(function, local_dict={'x': initial_x, 'y': initial_y})

    v_x = 0
    v_y = 0
    s_x = 0
    s_y = 0

    for i in range(num_steps):
        grad_x_val = float(grad_x.evalf(subs={'x': trajectory_x[i], 'y': trajectory_y[i]}))
        grad_y_val = float(grad_y.evalf(subs={'x': trajectory_x[i], 'y': trajectory_y[i]}))

        v_x = rho * v_x + (1 - rho) * (grad_x_val ** 2)
        v_y = rho * v_y + (1 - rho) * (grad_y_val ** 2)

        del_x = np.sqrt(s_x + epsilon) / np.sqrt(v_x + epsilon) * grad_x_val
        del_y = np.sqrt(s_y + epsilon) / np.sqrt(v_y + epsilon) * grad_y_val
        
        s_x = rho * s_x + (1 - rho) * del_x ** 2
        s_y = rho * s_y + (1 - rho) * del_y ** 2

        trajectory_x[i + 1] = trajectory_x[i] - learning_rate * del_x
        trajectory_y[i + 1] = trajectory_y[i] - learning_rate * del_y
        trajectory_z[i + 1] = numexpr.evaluate(function, local_dict={'x': trajectory_x[i + 1], 'y': trajectory_y[i + 1]})

    return trajectory_x, trajectory_y, trajectory_z