Spaces:

Msk7000
/

Image_Clf_App_Implementation_Comparison

Running

File size: 6,027 Bytes

a60082f

"""
2015 実装（参照用）— Theano + NumPy による手書き CNN
現在の Theano は Python 3.12 以降では動作しないため、
このファイルはアーキテクチャ記録・比較用途のドキュメントとして保存する。
実際の推論は model_2025.py で行う。
"""

# ---------------------------------------------------------------------------
# ※ 以下のコードは Python 3.8 / Theano 1.0 環境での動作を前提とする
# ---------------------------------------------------------------------------

import numpy as np

try:
    import theano
    import theano.tensor as T
    from theano.tensor.nnet import conv2d
    from theano.tensor.signal import pool
    THEANO_AVAILABLE = True
except ImportError:
    THEANO_AVAILABLE = False

from PIL import Image
import pickle

# ハイパーパラメータ
LEARNING_RATE = 0.01
N_EPOCHS      = 200
BATCH_SIZE    = 50
N_CLASSES     = 10

LABELS = [
    "airplane", "automobile", "bird", "cat", "deer",
    "dog", "frog", "horse", "ship", "truck",
]

MEAN = np.array([0.485, 0.456, 0.406], dtype=np.float32)
STD  = np.array([0.229, 0.224, 0.225], dtype=np.float32)


# ── ユーティリティ ──────────────────────────────────────────────────────────

def _shared_w(shape, name):
    return theano.shared(
        np.random.normal(0, 0.01, shape).astype(np.float32),
        name=name,
    )

def _shared_b(n, name):
    return theano.shared(np.zeros(n, dtype=np.float32), name=name)


# ── モデル構築 ──────────────────────────────────────────────────────────────

def build_model():
    if not THEANO_AVAILABLE:
        raise RuntimeError(
            "Theano がインストールされていません。"
            "Python 3.8 + Theano 1.0 の環境が必要です。"
        )

    # 重みパラメータ
    W0 = _shared_w((32, 3, 5, 5), "W0")    # Conv 1
    b0 = _shared_b(32, "b0")
    W1 = _shared_w((64, 32, 5, 5), "W1")   # Conv 2
    b1 = _shared_b(64, "b1")
    W2 = _shared_w((64 * 5 * 5, 512), "W2") # FC 1
    b2 = _shared_b(512, "b2")
    W3 = _shared_w((512, N_CLASSES), "W3")  # 出力
    b3 = _shared_b(N_CLASSES, "b3")

    params = [W0, b0, W1, b1, W2, b2, W3, b3]

    # シンボル変数
    x = T.tensor4("x")
    y = T.ivector("y")

    # フォワードパス（手動実装）
    conv0 = T.tanh(
        pool.pool_2d(
            conv2d(x, W0,
                   input_shape=(BATCH_SIZE, 3, 32, 32),
                   filter_shape=(32, 3, 5, 5))
            + b0.dimshuffle("x", 0, "x", "x"),
            ws=(2, 2), ignore_border=True,
        )
    )
    conv1 = T.tanh(
        pool.pool_2d(
            conv2d(conv0, W1, filter_shape=(64, 32, 5, 5))
            + b1.dimshuffle("x", 0, "x", "x"),
            ws=(2, 2), ignore_border=True,
        )
    )
    flat = conv1.flatten(2)
    fc   = T.tanh(T.dot(flat, W2) + b2)
    out  = T.nnet.softmax(T.dot(fc, W3) + b3)

    # 損失・勾配・SGD 更新則
    loss    = -T.mean(T.log(out)[T.arange(y.shape[0]), y])
    pred    =  T.argmax(out, axis=1)
    err     =  T.mean(T.neq(pred, y))
    grads   =  T.grad(loss, params)
    updates = [(p, p - LEARNING_RATE * g) for p, g in zip(params, grads)]

    # Theano 関数のコンパイル（GPUグラフ最適化が走るため数十秒かかる）
    train_fn = theano.function([x, y], [loss, err], updates=updates)
    pred_fn  = theano.function([x], pred)

    return params, train_fn, pred_fn


# ── 前処理 ─────────────────────────────────────────────────────────────────

def preprocess(image) -> np.ndarray:
    """PIL.Image または パスを受け取り、(1, 3, 32, 32) の float32 配列を返す。"""
    if isinstance(image, str):
        image = Image.open(image)
    img = image.convert("RGB").resize((32, 32))
    arr = np.array(img, dtype=np.float32) / 255.0
    arr = (arr - MEAN) / STD     # チャンネルごとの正規化
    arr = arr.transpose(2, 0, 1) # HWC → CHW
    return arr[np.newaxis]       # バッチ次元を追加


# ── 学習ループ ──────────────────────────────────────────────────────────────

def fit(train_fn, X_train: np.ndarray, y_train: np.ndarray) -> None:
    n = len(X_train)
    for epoch in range(N_EPOCHS):
        idx = np.random.permutation(n)
        losses, errs = [], []
        for i in range(n // BATCH_SIZE):
            b = idx[i * BATCH_SIZE : (i + 1) * BATCH_SIZE]
            l, e = train_fn(X_train[b], y_train[b])
            losses.append(l)
            errs.append(e)
        print(
            f"Epoch {epoch + 1:3d} / {N_EPOCHS}  "
            f"loss={np.mean(losses):.4f}  "
            f"err={np.mean(errs):.4f}"
        )


# ── 推論 ────────────────────────────────────────────────────────────────────

def classify(pred_fn, image) -> str:
    arr = preprocess(image)
    idx = pred_fn(arr)[0]
    return LABELS[idx]


# ── モデルの保存 / 読み込み ──────────────────────────────────────────────────

def save_model(params, path: str) -> None:
    weights = [p.get_value() for p in params]
    with open(path, "wb") as f:
        pickle.dump(weights, f, protocol=2)


def load_model(params, path: str) -> None:
    with open(path, "rb") as f:
        weights = pickle.load(f)
    for p, w in zip(params, weights):
        p.set_value(w)