nhradek
/

FLUX-Detection-Wavelets

Model card Files Files and versions

xet

Community

nhradek commited on Aug 13, 2024

Commit

ca87569

verified ·

1 Parent(s): 47ea9aa

Upload AI_Image_Classification.ipynb

Browse files

Files changed (1) hide show

AI_Image_Classification.ipynb +856 -0

AI_Image_Classification.ipynb ADDED Viewed

	@@ -0,0 +1,856 @@

+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "private_outputs": true,
+      "provenance": [],
+      "machine_shape": "hm"
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "CSC6_ShCp6h9"
+      },
+      "outputs": [],
+      "source": [
+        "!unzip AI.zip\n",
+        "!unzip Photo.zip"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!pip install umap-learn\n",
+        "!pip install PyWavelets"
+      ],
+      "metadata": {
+        "id": "N6CWTCziLMbf"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from sklearn.model_selection import train_test_split\n",
+        "from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, ConfusionMatrixDisplay\n",
+        "from sklearn.preprocessing import StandardScaler\n",
+        "from sklearn.decomposition import PCA\n",
+        "import umap\n",
+        "import pywt"
+      ],
+      "metadata": {
+        "id": "53ZvG8NbATlR"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# prompt: Create a function to load all the files in a folder as images.\n",
+        "\n",
+        "import os\n",
+        "from PIL import Image\n",
+        "def load_images_from_folder(folder):\n",
+        "  images = []\n",
+        "  labels = []\n",
+        "  for filename in os.listdir(folder):\n",
+        "    if not filename.endswith('.jpg') and not filename.endswith('.png') \\\n",
+        "      and not filename.endswith('jpeg') and not filename.endswith('webp'):\n",
+        "      continue\n",
+        "    img = Image.open(os.path.join(folder,filename))\n",
+        "    img = img.resize((512, 512))\n",
+        "    if img is not None:\n",
+        "      images.append(img)\n",
+        "      labels.append(1 if folder == \"AI\" else 0)\n",
+        "  return images, labels"
+      ],
+      "metadata": {
+        "id": "BH6bOWUXsi_D"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# prompt: Can you write a function that can implement the discrete wavelet transform and display the wavelets given in an array for the image? The function should take in an image_path and a list of wavelets and perform the dwt and display the wavelets.\n",
+        "\n",
+        "import matplotlib.pyplot as plt\n",
+        "import numpy as np\n",
+        "def apply_wavelet_transform_and_display_multiple(image_path, wavelets):\n",
+        "  # Load the image\n",
+        "  img = Image.open(image_path).convert('L')\n",
+        "\n",
+        "  # Convert image to numpy array\n",
+        "  img_array = np.array(img)\n",
+        "\n",
+        "  num_wavelets = len(wavelets)\n",
+        "  fig, axes = plt.subplots(1, num_wavelets + 1, figsize=(5 * (num_wavelets + 1), 5))\n",
+        "\n",
+        "  # Display the original image\n",
+        "  axes[0].imshow(img_array, cmap='gray')\n",
+        "  axes[0].set_title('Original Image')\n",
+        "\n",
+        "  # Apply DWT and display wavelets\n",
+        "  for i, wavelet in enumerate(wavelets):\n",
+        "    cA, cD = pywt.dwt(img_array, wavelet)\n",
+        "    axes[i + 1].imshow(cD, cmap='gray')\n",
+        "    axes[i + 1].set_title(f'Approximate Image ({wavelet})')\n",
+        "\n",
+        "  plt.tight_layout()\n",
+        "  plt.show()\n"
+      ],
+      "metadata": {
+        "id": "sBRFYk0C2nfX"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "apply_wavelet_transform_and_display_multiple('kiri-in-high-resolution-love-her-3-v0-ezejx6try3va1.webp', ['db1', 'db6', 'db10', 'db12', 'db16'])"
+      ],
+      "metadata": {
+        "id": "KfY3qSfkxJnS"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# prompt: Can you write a function that given a list of images from PIL can convert them to grayscale and apply a set of wavelets using dwt and then combined them into one feature vector?\n",
+        "\n",
+        "import numpy as np\n",
+        "def extract_wavelet_features(images, wavelets):\n",
+        "  all_features = []\n",
+        "  for img in images:\n",
+        "    img_gray = img.convert('L')\n",
+        "    img_array = np.array(img_gray)\n",
+        "    features = []\n",
+        "    for wavelet in wavelets:\n",
+        "      cA, cD = pywt.dwt(img_array, wavelet)\n",
+        "      features.extend(cD.flatten())\n",
+        "    all_features.append(features)\n",
+        "  return np.array(all_features)\n"
+      ],
+      "metadata": {
+        "id": "ufMhM7_86IbC"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# prompt: Apply the Fourier transform to the images from the load_images_from_folder function.\n",
+        "\n",
+        "import numpy as np\n",
+        "\n",
+        "\n",
+        "# Example usage (assuming 'folder_path' contains your images)\n",
+        "ai_images, ai_labels = load_images_from_folder('AI')\n",
+        "photo_images, photo_labels = load_images_from_folder('Photo')\n",
+        "min_length = min(len(ai_images), len(photo_images))\n",
+        "ai_images = ai_images[:min_length]\n",
+        "photo_images = photo_images[:min_length]\n",
+        "ai_labels = ai_labels[:min_length]\n",
+        "photo_labels = photo_labels[:min_length]\n",
+        "\n",
+        "print(f\"Number of AI images: {len(ai_images)}\")\n",
+        "print(f\"Number of Photo images: {len(photo_images)}\")\n",
+        "images = ai_images + photo_images\n",
+        "labels = ai_labels + photo_labels\n",
+        "features = np.array(extract_wavelet_features(images, [\"db4\", \"db10\"]))"
+      ],
+      "metadata": {
+        "id": "7Pfn_0-QswSh"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "reducer = umap.UMAP(n_neighbors=16, n_components=32, random_state=42)\n",
+        "embeddings = reducer.fit_transform(features)"
+      ],
+      "metadata": {
+        "id": "xc_1hAuTLdUj"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "reducer.embedding_.dtype"
+      ],
+      "metadata": {
+        "id": "qprQSJTCaPpv"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "X_train, X_test, y_train, y_test = train_test_split(embeddings, labels, test_size=0.2, random_state=42)"
+      ],
+      "metadata": {
+        "id": "dFQYuL3MbJLj"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from xgboost import XGBClassifier"
+      ],
+      "metadata": {
+        "id": "HoySyJJ4cL3n"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "xgb_clf = XGBClassifier(n_estimators=200, eval_metric=\"logloss\", learning_rate=0.01,\n",
+        "                        reg_lambda=0.8, max_depth=5, gamma=1.0, subsample=0.5,\n",
+        "                        colsample_bytree=0.5, min_child_weight=10)\n",
+        "xgb_clf.fit(X_train, y_train, eval_set=[(X_test, y_test)],\n",
+        "            verbose=True)\n",
+        "\n",
+        "xgb_clf_pred = xgb_clf.predict(X_test)\n",
+        "score = xgb_clf.score(X_test, y_test)\n",
+        "print(f\"Accuracy: {score}\")\n",
+        "\n",
+        "print(f\"F1 score: {f1_score(y_test, xgb_clf_pred)}\")"
+      ],
+      "metadata": {
+        "id": "vP5jesFXJHcY"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# prompt: Calculate the training accuracy\n",
+        "\n",
+        "xgb_clf_pred_train = xgb_clf.predict(X_train)\n",
+        "score = xgb_clf.score(X_train, y_train)\n",
+        "print(f\"Training Accuracy: {score}\")\n",
+        "\n",
+        "score = xgb_clf.score(X_test, y_test)\n",
+        "print(f\"Test Accuracy: {score}\")"
+      ],
+      "metadata": {
+        "id": "IljcJVxVVlgI"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# prompt: Can you perform four fold cross validation on the xgboost model?\n",
+        "\n",
+        "from sklearn.model_selection import cross_val_score, KFold\n",
+        "# Perform four-fold cross-validation\n",
+        "kfold = KFold(n_splits=4, shuffle=True, random_state=42)\n",
+        "scores = cross_val_score(xgb_clf, embeddings, labels, cv=kfold, scoring='accuracy')\n",
+        "\n",
+        "# Print the cross-validation scores\n",
+        "print(\"Cross-validation scores:\", scores)\n",
+        "print(\"Average cross-validation score:\", scores.mean())"
+      ],
+      "metadata": {
+        "id": "peofLwk78-mE"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "ConfusionMatrixDisplay.from_estimator(xgb_clf, X_test, y_test)"
+      ],
+      "metadata": {
+        "id": "5GvVgOoXcbJ-"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "xgb_clf.save_model(\"xgb_flux_detection_model.json\")"
+      ],
+      "metadata": {
+        "id": "5TZsByCxQqbU"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# prompt: A random classifier\n",
+        "\n",
+        "from sklearn.dummy import DummyClassifier\n",
+        "\n",
+        "# Initialize a random classifier\n",
+        "dummy_clf = DummyClassifier(strategy='uniform')  # Predicts randomly\n",
+        "\n",
+        "# Fit the classifier (not really necessary for a random classifier)\n",
+        "dummy_clf.fit(X_train, y_train)\n",
+        "\n",
+        "# Make predictions\n",
+        "dummy_pred = dummy_clf.predict(X_test)\n",
+        "\n",
+        "# Evaluate the performance\n",
+        "score = dummy_clf.score(X_test, y_test)\n",
+        "print(f\"Accuracy: {score}\")\n",
+        "print(f\"F1 score: {f1_score(y_test, dummy_pred)}\")\n",
+        "\n",
+        "ConfusionMatrixDisplay.from_estimator(dummy_clf, X_test, y_test)"
+      ],
+      "metadata": {
+        "id": "X7qkISlS4QjW"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# prompt: random forests with pruning\n",
+        "\n",
+        "from sklearn.ensemble import RandomForestClassifier\n",
+        "\n",
+        "# Initialize the RandomForestClassifier with pruning parameters\n",
+        "rf_clf = RandomForestClassifier(n_estimators=100,  # Number of trees in the forest\n",
+        "                                 max_depth=5,      # Maximum depth of each tree (pruning)\n",
+        "                                 min_samples_split=5,  # Minimum samples required to split a node (pruning)\n",
+        "                                 random_state=42)    # Random seed for reproducibility\n",
+        "\n",
+        "# Fit the classifier to the training data\n",
+        "rf_clf.fit(X_train, y_train)\n",
+        "\n",
+        "# Make predictions on the test data\n",
+        "rf_pred = rf_clf.predict(X_test)\n",
+        "\n",
+        "# Evaluate the performance\n",
+        "score = rf_clf.score(X_test, y_test)\n",
+        "print(f\"Accuracy: {score}\")\n",
+        "\n",
+        "print(f\"F1 score: {f1_score(y_test, rf_pred)}\")\n",
+        "\n",
+        "ConfusionMatrixDisplay.from_estimator(rf_clf, X_test, y_test)"
+      ],
+      "metadata": {
+        "id": "3qJFLsYT3xmi"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# prompt: Can you perform four fold cross validation on the rf model?\n",
+        "\n",
+        "from sklearn.model_selection import cross_val_score, KFold\n",
+        "# Perform four-fold cross-validation\n",
+        "kfold = KFold(n_splits=4, shuffle=True, random_state=42)\n",
+        "scores = cross_val_score(rf_clf, embeddings, labels, cv=kfold, scoring='accuracy')\n",
+        "\n",
+        "# Print the cross-validation scores\n",
+        "print(\"Cross-validation scores:\", scores)\n",
+        "print(\"Average cross-validation score:\", scores.mean())"
+      ],
+      "metadata": {
+        "id": "-gDc0KvD9_Yp"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# prompt: SVC classifier\n",
+        "\n",
+        "from sklearn.svm import SVC\n",
+        "\n",
+        "# Initialize the SVC classifier\n",
+        "svc_clf = SVC()\n",
+        "\n",
+        "# Fit the classifier to the training data\n",
+        "svc_clf.fit(X_train, y_train)\n",
+        "\n",
+        "# Make predictions on the test data\n",
+        "svc_pred = svc_clf.predict(X_test)\n",
+        "\n",
+        "# Evaluate the performance\n",
+        "score = svc_clf.score(X_test, y_test)\n",
+        "print(f\"Accuracy: {score}\")\n",
+        "\n",
+        "print(f\"F1 score: {f1_score(y_test, svc_pred)}\")\n",
+        "\n",
+        "ConfusionMatrixDisplay.from_estimator(svc_clf, X_test, y_test)\n"
+      ],
+      "metadata": {
+        "id": "1sQjrGeZ8Ir3"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# prompt: classify with KNN and K=7\n",
+        "\n",
+        "from sklearn.neighbors import KNeighborsClassifier\n",
+        "# Initialize the KNeighborsClassifier with K=7\n",
+        "knn_clf = KNeighborsClassifier(n_neighbors=7)\n",
+        "\n",
+        "# Fit the classifier to the training data\n",
+        "knn_clf.fit(X_train, y_train)\n",
+        "\n",
+        "# Make predictions on the test data\n",
+        "knn_pred = knn_clf.predict(X_test)\n",
+        "\n",
+        "# Evaluate the performance\n",
+        "score = knn_clf.score(X_test, y_test)\n",
+        "print(f\"Accuracy: {score}\")\n",
+        "\n",
+        "print(f\"F1 score: {f1_score(y_test, knn_pred)}\")\n",
+        "\n",
+        "ConfusionMatrixDisplay.from_estimator(knn_clf, X_test, y_test)\n"
+      ],
+      "metadata": {
+        "id": "vU8SRYsZ72Sr"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# prompt: Can you perform four fold cross validation on the KNN model?\n",
+        "\n",
+        "from sklearn.model_selection import cross_val_score, KFold\n",
+        "# Perform four-fold cross-validation\n",
+        "kfold = KFold(n_splits=4, shuffle=True, random_state=42)\n",
+        "scores = cross_val_score(knn_clf, embeddings, labels, cv=kfold, scoring='accuracy')\n",
+        "\n",
+        "# Print the cross-validation scores\n",
+        "print(\"Cross-validation scores:\", scores)\n",
+        "print(\"Average cross-validation score:\", scores.mean())"
+      ],
+      "metadata": {
+        "id": "1X9_4kAKRlSm"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import plotly.express as px\n",
+        "# Initialize UMAP with desired parameters\n",
+        "reducer = umap.UMAP(n_components=2, random_state=42)\n",
+        "\n",
+        "# Reduce the dimensionality of the features array\n",
+        "embedding = reducer.fit_transform(features)\n",
+        "import pandas as pd\n",
+        "\n",
+        "# Create a DataFrame for Plotly\n",
+        "embedding_df = pd.DataFrame(embedding, columns=['UMAP1', 'UMAP2'])\n",
+        "embedding_df['label'] = labels\n",
+        "# Create a scatter plot\n",
+        "fig = px.scatter(\n",
+        "    embedding_df,\n",
+        "    x='UMAP1',\n",
+        "    y='UMAP2',\n",
+        "    color='label',\n",
+        "    title='UMAP Dimensionality Reduction',\n",
+        "    labels={'color': 'Label'}\n",
+        ")\n",
+        "\n",
+        "# Show the plot\n",
+        "fig.show()"
+      ],
+      "metadata": {
+        "id": "wMEQoDF2Goj-"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# prompt: Save the knn classifier as a file\n",
+        "\n",
+        "import joblib\n",
+        "\n",
+        "# Save the knn classifier to a file\n",
+        "filename = 'knn_model.pkl'\n",
+        "joblib.dump(knn_clf, filename)\n"
+      ],
+      "metadata": {
+        "id": "I-Myacr4zsVy"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# prompt: load the knn model\n",
+        "\n",
+        "# Load the knn classifier from the file\n",
+        "filename = 'knn_model.pkl'\n",
+        "loaded_knn_clf = joblib.load(filename)"
+      ],
+      "metadata": {
+        "id": "yayMkQELAbZO"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# prompt: load the validation images and apply the wavelet transforms\n",
+        "\n",
+        "# Assuming 'validation_folder' contains your validation images\n",
+        "validation_images, validation_labels = load_images_from_folder('validation_folder')\n",
+        "\n",
+        "# Extract wavelet features from validation images\n",
+        "validation_features = extract_wavelet_features(validation_images, [\"db4\", \"db10\"])\n",
+        "\n",
+        "# Reduce dimensionality of validation features using the same UMAP reducer\n",
+        "validation_embeddings = reducer.transform(validation_features)\n",
+        "\n",
+        "# Now you have 'validation_embeddings' and 'validation_labels' for further use\n",
+        "# (e.g., evaluating your trained models on validation data)\n"
+      ],
+      "metadata": {
+        "id": "GKCz35S8E9jn"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "### Validation"
+      ],
+      "metadata": {
+        "id": "nrcTRu_ilEGk"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!unzip Validation.zip"
+      ],
+      "metadata": {
+        "id": "Yajcb-E5lDgl"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# prompt: load the validation images\n",
+        "\n",
+        "# Assuming 'Validation' is the folder containing your validation images\n",
+        "ai_validation_images, ai_validation_labels = load_images_from_folder('Validation/AI')\n",
+        "photo_validation_images, photo_validation_labels = load_images_from_folder('Validation/Photo')\n",
+        "\n",
+        "\n",
+        "# Now you have 'validation_images' and 'validation_labels' for further use\n",
+        "print(f\"Number of AI Validation images: {len(ai_validation_images)}\")\n",
+        "print(f\"Number of Photo Validation images: {len(ai_validation_images)}\")"
+      ],
+      "metadata": {
+        "id": "mS8hzT-TlGER"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# prompt: Combine both validation datasets and extract the wavelet features.\n",
+        "\n",
+        "# Combine validation datasets\n",
+        "validation_images = ai_validation_images + photo_validation_images\n",
+        "validation_labels = ai_validation_labels + photo_validation_labels\n",
+        "\n",
+        "# Extract wavelet features from validation images\n",
+        "validation_features = extract_wavelet_features(validation_images, [\"db4\", \"db10\"])"
+      ],
+      "metadata": {
+        "id": "iTeZUqEblbu1"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# prompt: apply the reducer to find the validation embeddings\n",
+        "\n",
+        "# Reduce dimensionality of validation features using the same UMAP reducer\n",
+        "validation_embeddings = reducer.transform(validation_features)"
+      ],
+      "metadata": {
+        "id": "jdUbmE4Hltng"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# prompt: find the accuracy and f1 score on the knn classifier for validation features\n",
+        "\n",
+        "# Make predictions on the validation data\n",
+        "knn_pred_validation = knn_clf.predict(validation_embeddings)\n",
+        "\n",
+        "# Evaluate the performance on validation data\n",
+        "score_validation = knn_clf.score(validation_embeddings, validation_labels)\n",
+        "print(f\"Validation Accuracy: {score_validation}\")\n",
+        "\n",
+        "print(f\"Validation F1 score: {f1_score(validation_labels, knn_pred_validation)}\")\n"
+      ],
+      "metadata": {
+        "id": "ls2ij5VxlyOX"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# prompt: Can you combine the entire pipeline into one class?\n",
+        "\n",
+        "from sklearn.model_selection import train_test_split\n",
+        "from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, ConfusionMatrixDisplay\n",
+        "from sklearn.preprocessing import StandardScaler\n",
+        "from sklearn.decomposition import PCA\n",
+        "import umap\n",
+        "import pywt\n",
+        "import os\n",
+        "from PIL import Image\n",
+        "import matplotlib.pyplot as plt\n",
+        "import numpy as np\n",
+        "from xgboost import XGBClassifier\n",
+        "from sklearn.model_selection import cross_val_score, KFold\n",
+        "from sklearn.dummy import DummyClassifier\n",
+        "from sklearn.ensemble import RandomForestClassifier\n",
+        "from sklearn.svm import SVC\n",
+        "from sklearn.neighbors import KNeighborsClassifier\n",
+        "from sklearn.model_selection import train_test_split\n",
+        "from sklearn.metrics import classification_report\n",
+        "import plotly.express as px\n",
+        "import pandas as pd\n",
+        "import joblib\n",
+        "from tqdm import tqdm\n",
+        "import lzma\n",
+        "\n",
+        "class FluxClassifier:\n",
+        "    def __init__(self, wavelets=[\"db4\", \"db10\"], umap_n_neighbors=16, umap_n_components=32, random_state=42):\n",
+        "        self.wavelets = wavelets\n",
+        "        self.umap_n_neighbors = umap_n_neighbors\n",
+        "        self.umap_n_components = umap_n_components\n",
+        "        self.random_state = random_state\n",
+        "        self.reducer = umap.UMAP(n_neighbors=self.umap_n_neighbors,\n",
+        "                                 n_components=self.umap_n_components,\n",
+        "                                 random_state=self.random_state)\n",
+        "        self.classifier = KNeighborsClassifier(n_neighbors=7)  # Default classifier\n",
+        "\n",
+        "    def load_images_from_folder(self, folder):\n",
+        "        images = []\n",
+        "        labels = []\n",
+        "        print(f\"Loading images from {folder}\")\n",
+        "        for filename in tqdm(os.listdir(folder)):\n",
+        "            if not (filename.endswith('.jpg') or filename.endswith('.png') or\n",
+        "                    filename.endswith('jpeg') or filename.endswith('webp')):\n",
+        "                continue\n",
+        "            img = Image.open(os.path.join(folder, filename))\n",
+        "            img = img.resize((512, 512))\n",
+        "            if img is not None:\n",
+        "                images.append(img)\n",
+        "                labels.append(1 if \"AI\" in folder else 0)  # Assuming folder names contain \"AI\" or not\n",
+        "        return images, labels\n",
+        "\n",
+        "    def extract_wavelet_features(self, images):\n",
+        "        all_features = []\n",
+        "        for img in images:\n",
+        "            img_gray = img.convert('L')\n",
+        "            img_array = np.array(img_gray)\n",
+        "            features = []\n",
+        "            for wavelet in self.wavelets:\n",
+        "                cA, cD = pywt.dwt(img_array, wavelet)\n",
+        "                features.extend(cD.flatten())\n",
+        "            all_features.append(features)\n",
+        "        return np.array(all_features)\n",
+        "\n",
+        "    def fit(self, train_folder1, train_folder2):\n",
+        "        # Load images and extract features\n",
+        "        images1, labels1 = self.load_images_from_folder(train_folder1)\n",
+        "        images2, labels2 = self.load_images_from_folder(train_folder2)\n",
+        "\n",
+        "        min_length = min(len(images1), len(images2))\n",
+        "        images1 = images1[:min_length]\n",
+        "        images2 = images2[:min_length]\n",
+        "        labels1 = labels1[:min_length]\n",
+        "        labels2 = labels2[:min_length]\n",
+        "\n",
+        "        images = images1 + images2\n",
+        "        labels = labels1 + labels2\n",
+        "        features = self.extract_wavelet_features(images)\n",
+        "\n",
+        "        # Apply UMAP dimensionality reduction\n",
+        "        embeddings = self.reducer.fit_transform(features)\n",
+        "        X_train, X_test, y_train, y_test = train_test_split(embeddings, labels, test_size=0.2, random_state=42)\n",
+        "\n",
+        "        # Train the classifier\n",
+        "        self.classifier.fit(X_train, y_train)\n",
+        "\n",
+        "        acc = self.classifier.score(X_test, y_test)\n",
+        "        y_pred = self.classifier.predict(X_test)\n",
+        "        print(f\"Classifier accuracy = {acc}\")\n",
+        "\n",
+        "        f1 = f1_score(y_test, y_pred)\n",
+        "        print(f\"Classifier F1 = {f1}\")\n",
+        "        print(classification_report(y_test, y_pred))\n",
+        "\n",
+        "\n",
+        "    def predict(self, images):\n",
+        "        # Load images and extract features\n",
+        "        features = self.extract_wavelet_features(images)\n",
+        "\n",
+        "        # Apply UMAP dimensionality reduction\n",
+        "        embeddings = self.reducer.transform(features)\n",
+        "\n",
+        "        # Make predictions\n",
+        "        return self.classifier.predict(embeddings)\n",
+        "\n",
+        "    def predict_proba(self, images):\n",
+        "        # Load images and extract features\n",
+        "        features = self.extract_wavelet_features(images)\n",
+        "\n",
+        "        # Apply UMAP dimensionality reduction\n",
+        "        embeddings = self.reducer.transform(features)\n",
+        "\n",
+        "        # Make predictions\n",
+        "        return self.classifier.predict_proba(embeddings)\n",
+        "\n",
+        "    def score(self, test_folder):\n",
+        "        # Load images and extract features\n",
+        "        images, labels = self.load_images_from_folder(test_folder)\n",
+        "        features = self.extract_wavelet_features(images)\n",
+        "\n",
+        "        # Apply UMAP dimensionality reduction\n",
+        "        embeddings = self.reducer.transform(features)\n",
+        "\n",
+        "        # Evaluate the classifier\n",
+        "        return self.classifier.score(embeddings, labels)\n",
+        "\n",
+        "    def save_model(self, filename):\n",
+        "        joblib.dump(self, filename, compress=('zlib', 9))\n",
+        "\n",
+        "    @staticmethod\n",
+        "    def load_model(filename):\n",
+        "        return joblib.load(filename)"
+      ],
+      "metadata": {
+        "id": "V8NO_N4QteQK"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "classifier = FluxClassifier()\n",
+        "classifier.fit(\"AI\", \"Photo\")"
+      ],
+      "metadata": {
+        "id": "sFYjKz1L6xgg"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "classifier.save_model(\"flux_classifier.pkl\")"
+      ],
+      "metadata": {
+        "id": "tiLVrOTF_ZGM"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# prompt: save the model to my google drive.\n",
+        "\n",
+        "from google.colab import drive\n",
+        "drive.mount('/content/drive')\n",
+        "!cp flux_classifier.pkl /content/drive/MyDrive"
+      ],
+      "metadata": {
+        "id": "sXo1mHFSADuS"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "images = [Image.open(\"pDGQUK1BYaJYhrFB5ouQU.jpeg\"), Image.open(\"jenta2.jpeg\")]\n",
+        "predictions = classifier.predict_proba(images)\n",
+        "print(predictions)"
+      ],
+      "metadata": {
+        "id": "cNVwQ7Oq6vWa"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [],
+      "metadata": {
+        "id": "98TbK3uH-_CD"
+      },
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}