import cv2
import numpy as np
from skimage.feature.texture import graycomatrix, graycoprops
from skimage.feature import local_binary_pattern ,hog
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline 


def rgb_histogram(image, bins=32):
    features = []

    # Convert to float32 for stability
    image = image.astype(np.float32)

    # RGB histograms
    for i in range(3):
        hist = cv2.calcHist([image], [i], None, [bins], [0, 256])
        hist = cv2.normalize(hist, hist).flatten()
        features.extend(hist)

    # HSV histograms
    hsv = cv2.cvtColor(image.astype(np.uint8), cv2.COLOR_RGB2HSV)
    for i, (low, high) in enumerate(zip([0, 0, 0], [180, 256, 256])):
        hist = cv2.calcHist([hsv], [i], None, [bins], [low, high])
        hist = cv2.normalize(hist, hist).flatten()
        features.extend(hist)

    # Color moments (mean, std, skew)
    for i in range(3):
        channel = image[:, :, i]
        mean = np.mean(channel)
        std = np.std(channel)
        skew = np.cbrt(np.mean((channel - mean) ** 3))  
        features.extend([mean, std, skew])

    return np.array(features)


def hu_moments(image):
    gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
    moments = cv2.moments(gray)
    hu = cv2.HuMoments(moments).flatten()
    hu = -np.sign(hu) * np.log10(np.abs(hu) + 1e-10)
    # Clip extreme values to reduce sensitivity to noise
    hu = np.clip(hu, -10, 10)
    return hu


def glcm_features(image, distances=[1, 2], angles=[0, np.pi/4, np.pi/2], levels=64):
    gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
    gray = (gray // (256 // levels)).astype(np.uint8)  # quantization
    features = []

    for d in distances:
        for a in angles:
            glcm = graycomatrix(gray, distances=[d], angles=[a], levels=levels, symmetric=True, normed=True)
            props = ['contrast', 'dissimilarity', 'homogeneity', 'energy', 'correlation']
            for p in props:
                val = graycoprops(glcm, p).flatten()
                features.extend(val)

    return np.array(features)


def local_binary_pattern_features(image, P=8, R=1):
    gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
    lbp = local_binary_pattern(gray, P, R, method='uniform')
    hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, P + 3), range=(0, P + 2), density=True)
    return hist


#  Edge Density (Canny-based)
def edge_density(image, low_threshold=50, high_threshold=150):
    gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
    edges = cv2.Canny(gray, low_threshold, high_threshold)
    density = np.sum(edges > 0) / edges.size
    return np.array([density])


def hog_features(image, pixels_per_cell=(16,16), cells_per_block=(2,2), orientations=9):
    image_resized = cv2.resize(image, (128, 128))
    gray = cv2.cvtColor(image_resized, cv2.COLOR_RGB2GRAY)
    hog_feat = hog(gray,
                orientations=orientations,
                pixels_per_cell=pixels_per_cell,
                cells_per_block=cells_per_block,
                block_norm='L2-Hys',
                transform_sqrt=True,
                feature_vector=True)
    return hog_feat


def extract_features_from_image(image):
    hist = rgb_histogram(image)
    hu = hu_moments(image)
    glcm = glcm_features(image)
    lbp = local_binary_pattern_features(image)
    edge = edge_density(image)
    hog_f = hog_features(image)

    return np.concatenate([hist, hu, glcm, lbp, edge, hog_f])

def perform_pca(data, num_components):
    # Clean data
    data = np.nan_to_num(data, nan=0.0, posinf=0.0, neginf=0.0)
    
    # Standardize
    scaler = StandardScaler()
    data_standardized = scaler.fit_transform(data)
    
    # Apply PCA
    k = min(num_components, data.shape[1])
    pca = PCA(n_components=k)
    data_reduced = pca.fit_transform(data_standardized)
    
    print(f"PCA: Reduced from {data.shape[1]} to {k} components")
    print(f"Explained variance: {np.sum(pca.explained_variance_ratio_):.4f}")
    
    return data_reduced

def train_svm_model(features, labels,
                            test_size=0.2,
                            random_state=42,
                            use_selectkbest=True,
                            k_best=500,
                            n_pca_components=100,
                            do_gridsearch=False):
    """
    Returns:
    pipeline: trained sklearn Pipeline (scaler -> optional SelectKBest -> PCA -> SVC)
    X_test, y_test, y_pred for quick evaluation
    grid_search (if do_gridsearch True), else None
    """
    if labels.ndim > 1 and labels.shape[1] > 1:
        labels = np.argmax(labels, axis=1)

    # stratified split
    X_train, X_test, y_train, y_test = train_test_split(
        features, labels, test_size=test_size, random_state=random_state, stratify=labels)

    # build pipeline steps
    steps = []
    steps.append(('scaler', StandardScaler()))
    if use_selectkbest:
        steps.append(('select', SelectKBest(score_func=f_classif, k=min(k_best, X_train.shape[1]))))
    steps.append(('pca', PCA(n_components=min(n_pca_components, X_train.shape[1]))))
    steps.append(('svc', SVC(kernel='linear', probability=True, class_weight='balanced', random_state=random_state)))
    pipeline = Pipeline(steps)

    grid_search = None
    if do_gridsearch:
        param_grid = {
            'select__k': [int(min(200, X_train.shape[1])), int(min(500, X_train.shape[1])), int(min(1000, X_train.shape[1]))] if use_selectkbest else [],
            'pca__n_components': [50, 100, 200],
            'svc__C': [0.1, 1, 5, 10]
        }
        # remove empty keys if use_selectkbest is False
        param_grid = {k: v for k, v in param_grid.items() if v}
        cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=random_state)
        grid_search = GridSearchCV(pipeline, param_grid, cv=cv, n_jobs=-1, scoring='accuracy', verbose=2)
        grid_search.fit(X_train, y_train)
        best_model = grid_search.best_estimator_
        pipeline = best_model
    else:
        pipeline.fit(X_train, y_train)

    # Evaluate
    y_pred = pipeline.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    print(f"Test Accuracy: {acc:.4f}")
    print(classification_report(y_test, y_pred))
    print("Confusion matrix:\n", confusion_matrix(y_test, y_pred))

    return pipeline, (X_test, y_test, y_pred), grid_search