notes

import%20marimo%0A%0A__generated_with%20%3D%20%220.23.3%22%0Aapp%20%3D%20marimo.App()%0A%0A%0A%40app.cell%0Adef%20_()%3A%0A%20%20%20%20import%20marimo%20as%20mo%0A%20%20%20%20import%20numpy%20as%20np%0A%0A%20%20%20%20%23%20Remove%20and%20warn%20if%20matplotlib%20is%20missing.%0A%20%20%20%20try%3A%0A%20%20%20%20%20%20%20%20import%20matplotlib.pyplot%20as%20plt%0A%20%20%20%20except%20ModuleNotFoundError%3A%0A%20%20%20%20%20%20%20%20plt%20%3D%20None%0A%20%20%20%20%20%20%20%20mo.notification(%22Matplotlib%20is%20not%20installed.%20Plots%20will%20be%20skipped%20or%20limited.%22%2C%20kind%3D%22warning%22)%0A%0A%20%20%20%20from%20sklearn.datasets%20import%20make_regression%0A%20%20%20%20from%20sklearn.linear_model%20import%20LinearRegression%0A%0A%20%20%20%20return%20LinearRegression%2C%20make_regression%2C%20mo%2C%20np%2C%20plt%0A%0A%0A%40app.cell%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(%22%23%20Linear%20Regression%3A%20A%20Beginner's%20Guide%22)%0A%20%20%20%20mo.md(%0A%20%20%20%20%20%20%20%20%22Linear%20regression%20is%20a%20fundamental%20technique%20in%20machine%20learning%20used%20to%20model%20the%20relationship%20between%20a%20dependent%20variable%20and%20one%20or%20more%20independent%20variables.%22%0A%20%20%20%20)%0A%20%20%20%20mo.md(%0A%20%20%20%20%20%20%20%20%22In%20this%20notebook%2C%20we'll%20explore%20what%20linear%20regression%20is%2C%20how%20it%20works%20mathematically%2C%20how%20to%20fit%20a%20model%20using%20scikit-learn%2C%20and%20how%20to%20implement%20it%20from%20scratch%20using%20gradient%20descent.%22%0A%20%20%20%20)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(LinearRegression%2C%20make_regression%2C%20mo%2C%20np%2C%20plt)%3A%0A%20%20%20%20mo.md(%22%23%23%20What%20is%20Linear%20Regression%3F%22)%0A%20%20%20%20mo.md(%0A%20%20%20%20%20%20%20%20%22Linear%20regression%20aims%20to%20find%20the%20'best%20fit'%20line%20through%20data%20points.%20This%20line%20is%20defined%20by%20weights%20(coefficients)%20and%20a%20bias%20term.%22%0A%20%20%20%20)%0A%0A%20%20%20%20%23%20Create%20sample%20data%0A%20%20%20%20X%2C%20y%20%3D%20make_regression(n_samples%3D100%2C%20n_features%3D1%2C%20noise%3D10%2C%20random_state%3D42)%0A%0A%20%20%20%20%23%20Fit%20sklearn%20linear%20regression%0A%20%20%20%20lin_reg%20%3D%20LinearRegression()%0A%20%20%20%20lin_reg.fit(X%2C%20y)%0A%0A%20%20%20%20%23%20Calculate%20predictions%0A%20%20%20%20y_pred%20%3D%20lin_reg.predict(X)%0A%0A%20%20%20%20if%20plt%20is%20not%20None%3A%0A%20%20%20%20%20%20%20%20%23%20Sort%20the%20data%20for%20a%20prettier%20line%20plot%0A%20%20%20%20%20%20%20%20sorted_idx%20%3D%20np.argsort(X%5B%3A%2C%200%5D)%0A%20%20%20%20%20%20%20%20X_sorted%20%3D%20X%5Bsorted_idx%5D%0A%20%20%20%20%20%20%20%20y_pred_sorted%20%3D%20y_pred%5Bsorted_idx%5D%0A%20%20%20%20%20%20%20%20y_sorted%20%3D%20y%5Bsorted_idx%5D%0A%0A%20%20%20%20%20%20%20%20%23%20Plot%0A%20%20%20%20%20%20%20%20fig%2C%20ax%20%3D%20plt.subplots(figsize%3D(10%2C%206))%0A%20%20%20%20%20%20%20%20ax.scatter(X%2C%20y%2C%20alpha%3D0.6%2C%20label%3D%22Data%20points%22)%0A%20%20%20%20%20%20%20%20ax.plot(X_sorted%2C%20y_pred_sorted%2C%20color%3D%22red%22%2C%20linewidth%3D2%2C%20label%3D%22Fitted%20line%22)%0A%20%20%20%20%20%20%20%20ax.set_xlabel(%22X%22)%0A%20%20%20%20%20%20%20%20ax.set_ylabel(%22y%22)%0A%20%20%20%20%20%20%20%20ax.legend()%0A%20%20%20%20%20%20%20%20ax.grid(True%2C%20alpha%3D0.3)%0A%20%20%20%20%20%20%20%20ax.set_title(%22Linear%20Regression%20Example%22)%0A%0A%20%20%20%20%20%20%20%20mo.display(fig)%0A%20%20%20%20else%3A%0A%20%20%20%20%20%20%20%20mo.notification(%22Skipping%20plot%3A%20matplotlib%20is%20not%20available.%22%2C%20kind%3D%22error%22)%0A%20%20%20%20return%20X%2C%20lin_reg%2C%20y%2C%20y_pred%0A%0A%0A%40app.cell%0Adef%20_(lin_reg%2C%20mo)%3A%0A%20%20%20%20mo.md(%22%23%23%20Mathematical%20Foundation%22)%0A%20%20%20%20mo.md(%22The%20linear%20regression%20model%20can%20be%20expressed%20as%3A%22)%0A%20%20%20%20mo.md(%22%24%24y%20%3D%20Xw%20%2B%20b%24%24%22)%0A%20%20%20%20mo.md(%22Where%3A%22)%0A%20%20%20%20mo.md(%22-%20%24y%24%20is%20the%20target%20variable%22)%0A%20%20%20%20mo.md(%22-%20%24X%24%20is%20the%20matrix%20of%20input%20features%22)%0A%20%20%20%20mo.md(%22-%20%24w%24%20is%20the%20vector%20of%20weights%22)%0A%20%20%20%20mo.md(%22-%20%24b%24%20is%20the%20bias%20term%22)%0A%0A%20%20%20%20mo.md(%22In%20our%20example%3A%22)%0A%20%20%20%20mo.md(f%22-%20Weights%20(w)%3A%20%7Blin_reg.coef_%5B0%5D%3A.2f%7D%22)%0A%20%20%20%20mo.md(f%22-%20Bias%20(b)%3A%20%7Blin_reg.intercept_%3A.2f%7D%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(mo%2C%20np%2C%20y%2C%20y_pred)%3A%0A%20%20%20%20mo.md(%22%23%23%20Loss%20Function%3A%20Mean%20Squared%20Error%20(MSE)%22)%0A%20%20%20%20mo.md(%0A%20%20%20%20%20%20%20%20%22The%20goal%20of%20linear%20regression%20is%20to%20minimize%20the%20error%20between%20predicted%20and%20actual%20values.%20The%20most%20common%20loss%20function%20is%20Mean%20Squared%20Error%3A%22%0A%20%20%20%20)%0A%20%20%20%20mo.md(%22%24%24%5C%5Ctext%7BMSE%7D%20%3D%20%5C%5Cfrac%7B1%7D%7Bn%7D%20%5C%5Csum_%7Bi%3D1%7D%5E%7Bn%7D%20(y_i%20-%20%5C%5Chat%7By%7D_i)%5E2%24%24%22)%0A%20%20%20%20mo.md(%22Where%3A%22)%0A%20%20%20%20mo.md(%22-%20%24y_i%24%20is%20the%20actual%20value%22)%0A%20%20%20%20mo.md(%22-%20%24%5C%5Chat%7By%7D_i%24%20is%20the%20predicted%20value%22)%0A%20%20%20%20mo.md(%22-%20%24n%24%20is%20the%20number%20of%20samples%22)%0A%0A%20%20%20%20%23%20Calculate%20MSE%0A%20%20%20%20mse%20%3D%20np.mean((y%20-%20y_pred)%20**%202)%0A%20%20%20%20mo.md(f%22In%20our%20example%2C%20the%20MSE%20is%20approximately%3A%20%7Bmse%3A.2f%7D%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(X%2C%20mo%2C%20np%2C%20y)%3A%0A%20%20%20%20mo.md(%22%23%23%20How%20Does%20It%20Work%3F%20Gradient%20Descent%22)%0A%20%20%20%20mo.md(%0A%20%20%20%20%20%20%20%20%22To%20minimize%20the%20MSE%2C%20we%20can%20use%20an%20optimization%20algorithm%20called%20gradient%20descent.%20This%20algorithm%20iteratively%20adjusts%20the%20weights%20and%20bias%20to%20reduce%20the%20loss%20function.%22%0A%20%20%20%20)%0A%0A%20%20%20%20%23%20Simple%20implementation%20of%20gradient%20descent%0A%20%20%20%20def%20gradient_descent(X%2C%20y%2C%20learning_rate%3D0.01%2C%20iterations%3D1000)%3A%0A%20%20%20%20%20%20%20%20m%20%3D%20len(X)%0A%20%20%20%20%20%20%20%20w%20%3D%20np.random.randn(1)%0A%20%20%20%20%20%20%20%20b%20%3D%200.0%0A%0A%20%20%20%20%20%20%20%20%23%20Reshape%20X%0A%20%20%20%20%20%20%20%20x_vec%20%3D%20X.ravel()%0A%20%20%20%20%20%20%20%20for%20i%20in%20range(iterations)%3A%0A%20%20%20%20%20%20%20%20%20%20%20%20y_pred%20%3D%20x_vec%20*%20w%20%2B%20b%0A%0A%20%20%20%20%20%20%20%20%20%20%20%20%23%20Compute%20gradients%0A%20%20%20%20%20%20%20%20%20%20%20%20dw%20%3D%20(-2%20%2F%20m)%20*%20np.sum(x_vec%20*%20(y%20-%20y_pred))%0A%20%20%20%20%20%20%20%20%20%20%20%20db%20%3D%20(-2%20%2F%20m)%20*%20np.sum(y%20-%20y_pred)%0A%0A%20%20%20%20%20%20%20%20%20%20%20%20w%20-%3D%20learning_rate%20*%20dw%0A%20%20%20%20%20%20%20%20%20%20%20%20b%20-%3D%20learning_rate%20*%20db%0A%0A%20%20%20%20%20%20%20%20return%20w%2C%20b%0A%0A%20%20%20%20w_gd%2C%20b_gd%20%3D%20gradient_descent(X%2C%20y)%0A%20%20%20%20y_pred_gd%20%3D%20X.ravel()%20*%20w_gd%20%2B%20b_gd%0A%0A%20%20%20%20mse_gd%20%3D%20np.mean((y%20-%20y_pred_gd)%20**%202)%0A%0A%20%20%20%20mo.md(%22Using%20gradient%20descent%20(learning%20rate%3D0.01%2C%201000%20iterations)%3A%22)%0A%20%20%20%20mo.md(f%22-%20Weights%20(w)%3A%20%7Bw_gd%5B0%5D%3A.2f%7D%22)%0A%20%20%20%20mo.md(f%22-%20Bias%20(b)%3A%20%7Bb_gd%3A.2f%7D%22)%0A%20%20%20%20mo.md(f%22-%20MSE%3A%20%7Bmse_gd%3A.2f%7D%22)%0A%20%20%20%20return%20(y_pred_gd%2C)%0A%0A%0A%40app.cell%0Adef%20_(X%2C%20mo%2C%20np%2C%20plt%2C%20y%2C%20y_pred%2C%20y_pred_gd)%3A%0A%20%20%20%20mo.md(%22%23%23%20Visualizing%20the%20Process%22)%0A%20%20%20%20if%20plt%20is%20not%20None%3A%0A%20%20%20%20%20%20%20%20fig%2C%20ax%20%3D%20plt.subplots(figsize%3D(10%2C%206))%0A%20%20%20%20%20%20%20%20ax.scatter(X%2C%20y%2C%20alpha%3D0.6%2C%20label%3D%22Data%20points%22)%0A%0A%20%20%20%20%20%20%20%20%23%20For%20clean%20lines%2C%20sort%20the%20X%20for%20both%20predictions%0A%20%20%20%20%20%20%20%20sorted_idx%20%3D%20np.argsort(X%5B%3A%2C%200%5D)%0A%20%20%20%20%20%20%20%20X_sorted%20%3D%20X%5Bsorted_idx%5D%0A%20%20%20%20%20%20%20%20y_pred_sorted%20%3D%20y_pred%5Bsorted_idx%5D%0A%20%20%20%20%20%20%20%20y_pred_gd_sorted%20%3D%20y_pred_gd%5Bsorted_idx%5D%0A%0A%20%20%20%20%20%20%20%20ax.plot(X_sorted%2C%20y_pred_sorted%2C%20color%3D%22red%22%2C%20linewidth%3D2%2C%20label%3D%22Sklearn%20fit%22)%0A%20%20%20%20%20%20%20%20ax.plot(X_sorted%2C%20y_pred_gd_sorted%2C%20color%3D%22green%22%2C%20linewidth%3D2%2C%20linestyle%3D%22--%22%2C%20label%3D%22Gradient%20descent%22)%0A%20%20%20%20%20%20%20%20ax.set_xlabel(%22X%22)%0A%20%20%20%20%20%20%20%20ax.set_ylabel(%22y%22)%0A%20%20%20%20%20%20%20%20ax.legend()%0A%20%20%20%20%20%20%20%20ax.grid(True%2C%20alpha%3D0.3)%0A%20%20%20%20%20%20%20%20ax.set_title(%22Comparison%3A%20Sklearn%20vs%20Gradient%20Descent%22)%0A%20%20%20%20%20%20%20%20mo.display(fig)%0A%20%20%20%20else%3A%0A%20%20%20%20%20%20%20%20mo.notification(%22Skipping%20plot%3A%20matplotlib%20is%20not%20available.%22%2C%20kind%3D%22error%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(%22%23%23%20Key%20Takeaways%22)%0A%20%20%20%20mo.md(%221.%20Linear%20regression%20models%20the%20relationship%20between%20variables%20using%20a%20linear%20equation.%22)%0A%20%20%20%20mo.md(%222.%20The%20parameters%20(weights%20and%20bias)%20are%20learned%20by%20minimizing%20the%20loss%20function.%22)%0A%20%20%20%20mo.md(%223.%20Mean%20Squared%20Error%20(MSE)%20is%20a%20common%20loss%20function.%22)%0A%20%20%20%20mo.md(%224.%20Gradient%20descent%20is%20commonly%20used%20to%20optimize%20parameters.%22)%0A%20%20%20%20mo.md(%225.%20Even%20simple%20models%20like%20linear%20regression%20can%20be%20very%20effective!%22)%0A%0A%20%20%20%20mo.md(%22---%22)%0A%20%20%20%20mo.md(%22**Try%20modifying%20the%20data%2C%20learning%20rate%2C%20or%20number%20of%20iterations%20above%20to%20see%20how%20the%20model%20changes!**%22)%0A%20%20%20%20return%0A%0A%0Aif%20__name__%20%3D%3D%20%22__main__%22%3A%0A%20%20%20%20app.run()%0A