Spaces:
Runtime error
Runtime error
| ## imports | |
| import numpy as np | |
| import pandas as pd | |
| from scipy.optimize import minimize | |
| from scipy.stats import norm | |
| import math | |
| ## Problem 1 | |
| data = [4, 5, 7, 8, 8, 9, 10, 5, 2, 3, 5, 4, 8, 9] | |
| data_mean = np.mean(data) | |
| data_variance = np.var(data) | |
| mu = 0.5 | |
| sigma = 0.5 | |
| w = np.array([mu, sigma]) | |
| w_star = np.array([data_mean, data_variance]) | |
| mu_star = data_mean | |
| sigma_star = np.sqrt(data_variance) | |
| offset = 10 * np.random.random(2) | |
| w1p = w_star + 0.5 * offset | |
| w1n = w_star - 0.5 * offset | |
| w2p = w_star + 0.25 * offset | |
| w2n = w_star - 0.25 * offset | |
| # Negative Log Likelihood is defined as follows: | |
| # $-\ln(\frac{1}{\sqrt{2\pi\sigma^2}}\exp(-\frac{1}{2}\frac{(x-\mu)}{\sigma}^2))$. | |
| # Ignoring the contribution of the constant, we find that $\frac{\delta}{\delta | |
| # \mu} \mathcal{N} = \frac{\mu-x}{\sigma^2}$ and $\frac{\delta}{\delta \sigma} | |
| # \mathcal{N} = \frac{\sigma^2 + (\mu-x)^2 - \sigma^2}{\sigma^3}$. We apply these as our step functions for our SGD. | |
| loss = lambda mu, sigma, x: np.sum( | |
| [-np.log(norm.pdf(xi, loc=mu, scale=sigma)) for xi in x] | |
| ) | |
| loss_2_electric_boogaloo = lambda mu, sigma, x: -len(x) / 2 * np.log( | |
| 2 * np.pi * sigma**2 | |
| ) - 1 / (2 * sigma**2) * np.sum((x - mu) ** 2) | |
| dmu = lambda mu, sigma, x: -np.sum([mu - xi for xi in x]) / (sigma**2) | |
| dsigma = lambda mu, sigma, x: -len(x) / sigma + np.sum([(mu - xi) ** 2 for xi in x]) / ( | |
| sigma**3 | |
| ) | |
| log = [] | |
| def SGD_problem1(mu, sigma, x, learning_rate=0.01, n_epochs=1000): | |
| global log | |
| log = [] | |
| for epoch in range(n_epochs): | |
| mu += learning_rate * dmu(mu, sigma, x) | |
| sigma += learning_rate * dsigma(mu, sigma, x) | |
| # print(f"Epoch {epoch}, Loss: {loss(mu, sigma, x)}, New mu: {mu}, New sigma: {sigma}") | |
| log.append( | |
| { | |
| "Epoch": epoch, | |
| "Loss": loss(mu, sigma, x), | |
| "Loss 2 Alternative": loss_2_alternative(mu, sigma, x), | |
| "New mu": mu, | |
| "New sigma": sigma, | |
| } | |
| ) | |
| return np.array([mu, sigma]) | |
| def debug_SGD_1(wnn, data): | |
| print("SGD Problem 1") | |
| print("wnn", SGD_problem1(*wnn, data)) | |
| dflog = pd.DataFrame(log) | |
| dflog["mu_star"] = mu_star | |
| dflog["mu_std"] = sigma_star | |
| print(f"mu diff at start {dflog.iloc[0]['New mu'] - dflog.iloc[0]['mu_star']}") | |
| print(f"mu diff at end {dflog.iloc[-1]['New mu'] - dflog.iloc[-1]['mu_star']}") | |
| if np.abs(dflog.iloc[-1]["New mu"] - dflog.iloc[-1]["mu_star"]) < np.abs( | |
| dflog.iloc[0]["New mu"] - dflog.iloc[0]["mu_star"] | |
| ): | |
| print("mu is improving") | |
| else: | |
| print("mu is not improving") | |
| print(f"sigma diff at start {dflog.iloc[0]['New sigma'] - dflog.iloc[0]['mu_std']}") | |
| print(f"sigma diff at end {dflog.iloc[-1]['New sigma'] - dflog.iloc[-1]['mu_std']}") | |
| if np.abs(dflog.iloc[-1]["New sigma"] - dflog.iloc[-1]["mu_std"]) < np.abs( | |
| dflog.iloc[0]["New sigma"] - dflog.iloc[0]["mu_std"] | |
| ): | |
| print("sigma is improving") | |
| else: | |
| print("sigma is not improving") | |
| return dflog | |
| # _ = debug_SGD_1(w1p, data) | |
| # _ = debug_SGD_1(w1n, data) | |
| # _ = debug_SGD_1(w2p, data) | |
| # _ = debug_SGD_1(w2n, data) | |
| # TODO EXPLAIN WHY += WORKS HERE. | |
| ## Problem 2 | |
| x = np.array([8, 16, 22, 33, 50, 51]) | |
| y = np.array([5, 20, 14, 32, 42, 58]) | |
| # $-\frac{n}{\sigma}+\frac{1}{\sigma^3}\sum_{i=1}^n(y_i - (mx+c))^2$ | |
| dsigma = lambda sigma, c, m, x: -len(x) / sigma + np.sum( | |
| [(xi - (m * x + c)) ** 2 for xi in x] | |
| ) / (sigma**3) | |
| # $-\frac{1}{\sigma^2}\sum_{i=1}^n(y_i - (mx+c))$ | |
| dc = lambda sigma, c, m, x: -np.sum([xi - (m * x + c) for xi in x]) / (sigma**2) | |
| # $-\frac{1}{\sigma^2}\sum_{i=1}^n(x_i(y_i - (mx+c)))$ | |
| dm = lambda sigma, c, m, x: -np.sum([x * (xi - (m * x + c)) for xi in x]) / (sigma**2) | |
| log2 = [] | |
| def SGD_problem2( | |
| sigma: float, | |
| c: float, | |
| m: float, | |
| x: np.array, | |
| y: np.array, | |
| learning_rate=0.01, | |
| n_epochs=1000, | |
| ): | |
| global log2 | |
| log2 = [] | |
| for epoch in range(n_epochs): | |
| sigma += learning_rate * dsigma(sigma, c, m, x) | |
| c += learning_rate * dc(sigma, c, m, x) | |
| m += learning_rate * dm(sigma, c, m, x) | |
| log2.append( | |
| { | |
| "Epoch": epoch, | |
| "New sigma": sigma, | |
| "New c": c, | |
| "New m": m, | |
| "dc": dc(sigma, c, m, x), | |
| "dm": dm(sigma, c, m, x), | |
| "dsigma": dsigma(sigma, c, m, x), | |
| "Loss": loss((m * x + c), sigma, y), | |
| } | |
| ) | |
| print(f"Epoch {epoch}, Loss: {loss((m * x + c), sigma, y)}") | |
| return np.array([sigma, c, m]) | |
| # def debug_SGD_2(wnn, data): | |
| # print("SGD Problem 2") | |
| # print("wnn", SGD_problem2(*wnn, data)) | |
| # dflog = pd.DataFrame(log) | |
| # dflog["m_star"] = m_star | |
| # dflog["c_star"] = c_star | |
| # dflog["sigma_star"] = sigma_star | |
| # print(f"m diff at start {dflog.iloc[0]['New m'] - dflog.iloc[0]['m_star']}") | |
| # print(f"m diff at end {dflog.iloc[-1]['New m'] - dflog.iloc[-1]['m_star']}") | |
| # if np.abs(dflog.iloc[-1]["New m"] - dflog.iloc[-1]["m_star"]) < np.abs( | |
| # dflog.iloc[0]["New m"] - dflog.iloc[0]["m_star"] | |
| # ): | |
| # print("m is improving") | |
| # else: | |
| # print("m is not improving") | |
| # print(f"c diff at start {dflog.iloc[0]['New c'] - dflog.iloc[0]['c_star']}") | |
| # print(f"c diff at end {dflog.iloc[-1]['New c'] - dflog.iloc[-1]['c_star']}") | |
| # if np.abs(dflog.iloc[-1]["New c"] - dflog.iloc[-1]["c_star"]) < np.abs( | |
| # dflog.iloc[0]["New c"] - dflog.iloc[0]["c_star"] | |
| # ): | |
| # print("c is improving") | |
| # else: | |
| # print("c is not improving") | |
| # print(f"sigma diff at start {dflog.iloc[0]['New sigma'] - dflog.iloc[0]['sigma_star']}") | |
| # print(f"sigma diff at end {dflog.iloc[-1]['New sigma'] - dflog.iloc[-1]['sigma_star']}") | |
| # if np.abs(dflog.iloc[-1]["New sigma"] - dflog.iloc[-1]["sigma_star"]) < np.abs( | |
| # dflog.iloc[0]["New sigma"] - dflog.iloc[0]["sigma_star"] | |
| # ): | |
| # print("sigma is improving") | |
| # else: | |
| # print("sigma is not improving") | |
| # return dflog | |
| result = SGD_problem2(0.5, 0.5, 0.5, x, y) | |
| print(f"final parameters: m={result[2]}, c={result[1]}, sigma={result[0]}") | |
| ## pset2 | |
| # Knowing that the poisson pdf is $P(k) = \frac{\lambda^k e^{-\lambda}}{k!}$, we can find the negative log likelihood of the data as $-\log(\Pi_{i=1}^n P(k_i)) = -\sum_{i=1}^n \log(\frac{\lambda^k_i e^{-\lambda}}{k_i!}) = \sum_{i=1}^n -\ln(\lambda) k_i + \ln(k_i!) + \lambda$. Which simplified, gives $n\lambda + \sum_{i=1}^n \ln(k_i!) - \sum_{i=1}^n k_i \ln(\lambda)$. Differentiating with respect to $\lambda$ gives $n - \sum_{i=1}^n \frac{k_i}{\lambda}$. Which is our desired $\frac{\partial L}{\partial \lambda}$! | |
| import pandas as pd | |
| df = pd.read_csv("../data/01_raw/nyc_bb_bicyclist_counts.csv") | |
| dlambda = lambda l, k: len(k) - np.sum([ki / l for ki in k]) | |
| def SGD_problem3( | |
| l: float, | |
| k: np.array, | |
| learning_rate=0.01, | |
| n_epochs=1000, | |
| ): | |
| global log3 | |
| log3 = [] | |
| for epoch in range(n_epochs): | |
| l -= learning_rate * dlambda(l, k) | |
| # $n\lambda + \sum_{i=1}^n \ln(k_i!) - \sum_{i=1}^n k_i \ln(\lambda)$ | |
| # the rest of the loss function is commented out because it's a | |
| # constant and was causing overflows. It is unnecessary, and a useless | |
| # pain. | |
| loss = len(k) * l - np.sum( | |
| [ki * np.log(l) for ki in k] | |
| ) # + np.sum([np.log(np.math.factorial(ki)) for ki in k]) | |
| log3.append( | |
| { | |
| "Epoch": epoch, | |
| "New lambda": l, | |
| "dlambda": dlambda(l, k), | |
| "Loss": loss, | |
| } | |
| ) | |
| print(f"Epoch {epoch}", f"Loss: {loss}") | |
| return np.array([l]) | |
| l_star = df["BB_COUNT"].mean() | |
| def debug_SGD_3(data, l=1000): | |
| print("SGD Problem 3") | |
| print(f"l: {SGD_problem3(l, data)}") | |
| dflog = pd.DataFrame(log3) | |
| dflog["l_star"] = l_star | |
| print(f"l diff at start {dflog.iloc[0]['New lambda'] - dflog.iloc[0]['l_star']}") | |
| print(f"l diff at end {dflog.iloc[-1]['New lambda'] - dflog.iloc[-1]['l_star']}") | |
| if np.abs(dflog.iloc[-1]["New lambda"] - dflog.iloc[-1]["l_star"]) < np.abs( | |
| dflog.iloc[0]["New lambda"] - dflog.iloc[0]["l_star"] | |
| ): | |
| print("l is improving") | |
| else: | |
| print("l is not improving") | |
| return dflog | |
| debug_SGD_3(data=df["BB_COUNT"].values, l=l_star + 1000) | |
| debug_SGD_3(data=df["BB_COUNT"].values, l=l_star - 1000) | |
| ## pset 4 | |
| # dw = lambda w, x: len(x) * np.exp(np.dot(x, w)) * x - np.sum() | |
| primitive = lambda xi, wi: (x.shape[0] * np.exp(wi * xi) * xi) - (xi**2) | |
| p_dw = lambda w, xi: np.array([primitive(xi, wi) for xi, wi in ]) | |
| def SGD_problem4( | |
| w: np.array, | |
| x: np.array, | |
| learning_rate=0.01, | |
| n_epochs=1000, | |
| ): | |
| global log4 | |
| log4 = [] | |
| for epoch in range(n_epochs): | |
| w -= learning_rate * p_dw(w, x) | |
| # custom | |
| # loss = x.shape[0] * np.exp(np.dot(x, w)) | |
| loss_fn = lambda k, l: len(k) * l - np.sum( | |
| [ki * np.log(l) for ki in k] | |
| ) # + np.sum([np.log(np.math.factorial(ki)) for ki in k]) | |
| loss = loss_fn(x, np.exp(np.dot(x, w))) | |
| log4.append( | |
| { | |
| "Epoch": epoch, | |
| "New w": w, | |
| "dw": dw(w, x), | |
| "Loss": loss, | |
| } | |
| ) | |
| print(f"Epoch {epoch}", f"Loss: {loss}") | |
| return w | |
| def debug_SGD_3(data, w=np.array([1, 1])): | |
| print("SGD Problem 4") | |
| print(f"w: {SGD_problem4(w, data)}") | |
| dflog = pd.DataFrame(log4) | |
| return dflog | |
| _ = debug_SGD_3( | |
| data=df[["HIGH_T", "LOW_T", "PRECIP"]].to_numpy(), | |
| w=np.array([1.0, 1.0, 1.0]), | |
| ) | |