File size: 2,207 Bytes
11ac7be
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
from __future__ import annotations

import math
from dataclasses import dataclass
from random import Random
from typing import Dict, List, Optional

from .logistic import LogisticRegression
from .metrics import evaluate


@dataclass(frozen=True)
class TrainResult:
    model: LogisticRegression
    history: List[Dict[str, float]]


def _l2_norm(v: List[float]) -> float:
    return math.sqrt(sum(x * x for x in v))


def train_logreg_sgd(
    *,
    seed: int,
    xs_train: List[List[float]],
    ys_train: List[int],
    xs_val: List[List[float]],
    ys_val: List[int],
    epochs: int,
    lr: float,
    l2: float = 0.0,
    grad_clip: Optional[float] = None,
    loss_eps: float = 1e-12,
) -> TrainResult:
    r = Random(seed)
    n_features = len(xs_train[0]) if xs_train else 0
    model = LogisticRegression.init(n_features)
    history: List[Dict[str, float]] = []

    idx = list(range(len(xs_train)))
    for epoch in range(1, epochs + 1):
        r.shuffle(idx)

        for i in idx:
            x = xs_train[i]
            y = ys_train[i]
            p = model.predict_proba_one(x)

            g = (p - y)
            grad_w = [g * xi + (l2 * wi) for xi, wi in zip(x, model.w)]
            grad_b = g

            if grad_clip is not None and grad_clip > 0:
                norm = _l2_norm(grad_w)
                if norm > grad_clip:
                    scale = grad_clip / (norm + 1e-12)
                    grad_w = [gw * scale for gw in grad_w]
                    grad_b *= scale

            model.w = [wi - lr * gw for wi, gw in zip(model.w, grad_w)]
            model.b = model.b - lr * grad_b

        train_loss, train_acc = evaluate(model, xs_train, ys_train, eps=loss_eps)
        val_loss, val_acc = evaluate(model, xs_val, ys_val, eps=loss_eps)
        history.append(
            {
                "epoch": float(epoch),
                "train_loss": float(train_loss),
                "train_acc": float(train_acc),
                "val_loss": float(val_loss),
                "val_acc": float(val_acc),
            }
        )

        if not math.isfinite(train_loss) or not math.isfinite(val_loss):
            break

    return TrainResult(model=model, history=history)