Spaces:

aliakyurek
/

LinearRegression1_OLS

Runtime error

Ali Gunhan Akyurek

remove bad arg

93bb2e3 over 3 years ago

6.19 kB

	import pandas as pd
	import numpy as np
	import matplotlib
	matplotlib.use("Agg")
	import matplotlib.pyplot as plt
	import gradio as gr

	def load_dataset(n=100, w = 0.4, b=5., x_range = [0, 50]):
	np.random.seed(42)
	def s(x):
	g = (x - x_range[0]) / (x_range[1] - x_range[0])
	return 5 * (0.25 + g**2.)

	x = (x_range[1] - x_range[0]) * np.random.rand(n) + x_range[0]
	eps = np.random.randn(n) * s(x)
	y = (w * x * (1. + np.sin(x)/5) + b) + eps
	y = (y - y.mean()) / y.std()
	idx = np.argsort(x)
	return pd.DataFrame({"x": x[idx], "y": y[idx]})

	def check_sanitize_data(inp):
	try:
	inp=inp.astype(float)
	except:
	return None, [("Data points not numeric", "Error")]
	x,y = inp["x"].to_numpy(), inp["y"].to_numpy()
	if(len(x)<2):
	return None, [("Data points not provided", "Error")]
	return (x,y), [("", "OK")]

	def plot_data(inp, m=None, b=None):
	xy, status = check_sanitize_data(inp)
	if xy is None:
	return None, status
	x, y = xy
	fig,ax = plt.subplots()
	ax.set(aspect=np.std(x).item()/3, ylabel="Y-axis")
	ax.plot(x, y, "o", label="Original data", markersize=2)
	# center text
	# fig.text(.5, .05, "OLS", ha="center")
	if(m):
	y_hat = m * x + b
	rss = np.sum((y-y_hat)**2)
	ax.set(xlabel = f"RSS:{rss:.4f}")
	ax.xaxis.label.set(color="red")
	ax.plot(x, m * x + b, "r", label="Fitted line")
	ax.legend()
	ax.grid()
	fig.tight_layout()
	return fig, [("Data check", "OK")]

	def linear_regression_from_scratch(X,y):
	XT_X = np.matmul(X.T, X)
	XT_y = np.matmul(X.T, y)
	m,b = np.matmul(np.linalg.inv(XT_X), XT_y)
	return m,b

	def linear_regression_linalg_lstsq(X,y):
	(m,b),*_ = np.linalg.lstsq(X, y, rcond=None)
	return m,b

	def linear_regression_plot(method, inp):
	xy, status = check_sanitize_data(inp)
	if xy is None:
	return None, status
	x,y = xy
	X = np.column_stack((x, np.ones(len(x))))
	if method == "numpy from scratch":
	m, b = linear_regression_from_scratch(X, y)
	elif method == "numpy.linalg.lstsq":
	m, b = linear_regression_linalg_lstsq(X, y)
	else:
	return None, [("Method not selected", "Error")]
	fig, _ = plot_data(inp, m, b)
	return fig, [("Regression", "OK")]

	data = load_dataset()
	block_params = {
	"title": "Ordinary Least Squares",
	"css": """
	#XY {max-height: 350px; overflow-y: scroll}
	#images img {width:auto; height:auto}
	#images .flex {display:none; height:auto}
	#accord > div > span {font-weight: bold}
	"""
	}
	plot_data(data)

	with gr.Blocks(**block_params) as demo:
	with gr.Row():
	with gr.Column(scale=1):

	data_frm = gr.Dataframe(headers=data.columns.tolist(),
	datatype=["number", "number"],
	col_count=(2, "fixed"), elem_id="XY"
	)
	plot_btn = gr.Button("Check&Plot")
	gr.Examples([[data.values.tolist()]], inputs=data_frm)
	gr.Markdown("""
	#### How to use?
	1.Fill the x-y table below (or use the example data provided)
	2.Check&Plot
	3.Select an implementation
	4.Regression&Plot
	""")
	with gr.Column(scale=1):
	status_hlt = gr.HighlightedText(
	label="Status",
	combine_adjacent=True,
	).style(color_map={"Error": "red", "OK": "green"})
	data_plt = gr.Plot(label="Plot")
	method_dd = gr.Dropdown(label="Select an implementation",choices=["numpy from scratch", "numpy.linalg.lstsq"],)
	regression_btn = gr.Button("Regression&Plot")
	# gr.Examples(label="Proofs", examples=[["img.png"]],inputs=img)

	with gr.Accordion("Motivation", open=False, elem_id="accord"):
	gr.Markdown("""
	In this space, I tried to get most out of Gradio an HF. So that this combination can be
	used not only for advanced ML models but also to demonstrate the topics regarding
	mathematical background of ML. The first topic is Linear Regression optimized with OLS
	""")
	with gr.Accordion("Model Card", open=False, elem_id="accord"):
	gr.Markdown("""
	\| Name \| Objective \| Metric \| Solution \|
	\| -------- \| ------- \| -------- \| -------- \|
	\| Linear regression \| Ord. least squares (OLS) \| Residual sum-of-squares (RSS) \| Analytical \|
	""")
	with gr.Accordion("Math Background", open=False, elem_id="accord"):
	with gr.Row():
	with gr.Column(scale=1):
	gr.Markdown("""
	We have a linear regression model in (1).
	We want to minimize RSS (2).
	We need the derivative of RSS(β) with respect to β to and set it zero.
	The resulting formula is given (3).
	An example matrix represenation of the model y = Xβ is given (4).
	""")
	with gr.Column(scale=1):
	img = gr.Image(label="Proof", value="img.png", elem_id="images")
	with gr.Accordion("References", open=False, elem_id="accord"):
	links = ("statproofbook.github.io/P/mlr-ols",
	"statproofbook.github.io/P/mlr-ols2",
	"towardsdatascience.com/building-linear-regression-least-squares-with-linear-algebra-2adf071dd5dd")
	gr.Markdown("\n".join(f"{i}.[https://{l}](https://{l}) " for i, l in enumerate(links,1)))

	plot_btn.click(fn=plot_data, inputs=data_frm, outputs=[data_plt,status_hlt])
	regression_btn.click(fn=linear_regression_plot, inputs=[method_dd,data_frm], outputs=[data_plt,status_hlt])

	demo.launch()