| #pragma once |
| #include "../core/tensor.hpp" |
| #include <vector> |
|
|
| namespace newnet { |
|
|
| |
| |
| class SGD { |
| public: |
| float lr; |
| |
| SGD(float learning_rate) : lr(learning_rate) {} |
| |
| |
| void step(std::vector<Tensor*> params) { |
| for (Tensor* p : params) { |
| assert(p->data.size() == p->grad.size()); |
| for (int i = 0; i < (int)p->data.size(); i++) { |
| p->data[i] -= lr * p->grad[i]; |
| } |
| } |
| } |
| |
| |
| |
| |
| void zero_grad(std::vector<Tensor*> params) { |
| for (Tensor* p : params) { |
| p->zero_grad(); |
| } |
| } |
| }; |
|
|
| |
| |
| class Adam { |
| public: |
| float lr; |
| float beta1; |
| float beta2; |
| float epsilon; |
| int t; |
| |
| |
| |
| std::vector<std::vector<float>> m; |
| std::vector<std::vector<float>> v; |
| bool initialized; |
| |
| Adam(float learning_rate = 0.001f, float b1 = 0.9f, float b2 = 0.999f, float eps = 1e-8f) |
| : lr(learning_rate), beta1(b1), beta2(b2), epsilon(eps), t(0), initialized(false) {} |
| |
| void step(std::vector<Tensor*> params) { |
| |
| if (!initialized) { |
| for (auto* p : params) { |
| m.push_back(std::vector<float>(p->data.size(), 0.0f)); |
| v.push_back(std::vector<float>(p->data.size(), 0.0f)); |
| } |
| initialized = true; |
| } |
| |
| t++; |
| |
| for (int idx = 0; idx < (int)params.size(); idx++) { |
| Tensor* p = params[idx]; |
| for (int i = 0; i < (int)p->data.size(); i++) { |
| |
| m[idx][i] = beta1 * m[idx][i] + (1.0f - beta1) * p->grad[i]; |
| |
| v[idx][i] = beta2 * v[idx][i] + (1.0f - beta2) * p->grad[i] * p->grad[i]; |
| |
| |
| float m_hat = m[idx][i] / (1.0f - std::pow(beta1, t)); |
| float v_hat = v[idx][i] / (1.0f - std::pow(beta2, t)); |
| |
| |
| p->data[i] -= lr * m_hat / (std::sqrt(v_hat) + epsilon); |
| } |
| } |
| } |
| |
| void zero_grad(std::vector<Tensor*> params) { |
| for (Tensor* p : params) { |
| p->zero_grad(); |
| } |
| } |
| }; |
|
|
| } |
|
|