Maching-Learning-Models / Linear_Regression.py

Upload 3 files

662d38c verified 4 months ago

6.23 kB

	# Hồi quy tuyến tính đơn giản
	import numpy as np

	# Dữ liệu y = 2x + 1
	x = np.array([1,2,3,4,5])
	y = np.array([3,5,7,9,11])

	# Tính hệ số w và b theo công thức thống kê
	w = np.sum((x - x.mean()) * (y - y.mean())) / np.sum((x - x.mean())**2)
	b = y.mean() - w * x.mean()

	# Dự đoán
	y_pred = w * x + b

	print(f"Phương trình: y= {w: .1f}x + {b: .1f}")
	print(f"Dự đoán:", y_pred)


	# Vẽ đồ thị Linear Regression để trực quan hóa
	# Hồi quy tuyến tính đơn giản + vẽ biểu đồ
	import numpy as np
	import matplotlib.pyplot as plt

	# Dữ liệu y = 2x + 1
	x = np.array([1,2,3,4,5])
	y = np.array([3,5,7,9,11])

	# Tính hệ số hồi quy
	w = np.sum((x - x.mean()) * (y - y.mean())) / np.sum((x - x.mean()) ** 2)
	b = y.mean() - w * x.mean()

	# Dự đoán
	y_pred = w * x + b

	# In ra kết quả
	print(f"Phương trình hồi quy: y={w: .1f}x + {b: .1f}")

	# Vẽ biểu đồ
	plt.scatter(x,y, color='blue', label='Dữ liệu thật')
	plt.plot(x,y_pred, color='red', label='Đường hồi quy')
	plt.xlabel('x axis')
	plt.ylabel('y axis')
	plt.title('Hồi quy tuyến tính đơn giản + Trực quan hóa')
	plt.legend()
	plt.show()

	# Luyện tập
	# 1. Tuyến tính nhưng đổi hệ số khác
	import numpy as np
	# Dữ liệu y = 4x - 3
	x = np.array([1,2,3,4,5,6,7])
	y = np.array([1,5,9,13,17,21,25]) # y = 4x - 3
	# Tính hệ số w và b theo công thức thống kê
	w = np.sum((x - x.mean()) * (y - y.mean())) / np.sum((x - x.mean())**2)
	b = y.mean() - w * x.mean()
	# Dự đoán
	y_predict = w * x + b
	print(f"Phương trình y= {w: .0f}x + {b: .0f}")
	print(f"Dự đoán:", y_predict)
	# Vẽ đồ thị để trực quan hóa
	import matplotlib.pyplot as plt
	plt.scatter(x,y, color='black', label='Real Data')
	plt.plot(x, y_predict, color='red', label='Predicted Data')
	plt.xlabel('X - Axis')
	plt.ylabel('Y - Axis')
	plt.title('Linear Regression (4x - 3)')
	plt.legend()
	plt.show()

	# 2. Tuyến tính có nhiễu(noise) thực tế hơn
	# Công thức y = 3x + 2 + noise
	import numpy as np
	import matplotlib.pyplot as plt
	# Dữ liệu 3x + 2 + noise
	np.random.seed(0)
	x = np.arange(1,11)
	y = 3 * x + 2 + np.random.randn(10) * 2 # thêm nhiễu ngẫu nhiên
	# Tính hệ số w và b theo công thức thống kê
	w = np.sum((x - x.mean()) * (y - y.mean())) / np.sum((x - x.mean())**2)
	b = y.mean() - w * x.mean()
	# Dự đoán
	y_predict = w * x + b
	print(f"Phương trình + nhiễu: y = {w: .1f}x + {b: .1f}")
	print(f"Dự đoán chính xác:", y_predict)
	# Vẽ đồ thị trực quan hóa với nhiễu
	plt.scatter(x,y, color='blue', label='Real Data + Noise')
	plt.plot(x,y_predict, color='red', label='Predicted Data + Noise')
	plt.xlabel('X - Axis')
	plt.ylabel('Y - Axis')
	plt.title('Linear Regression AND Noise')
	plt.legend()
	plt.show()

	# 3. Tuyến tính nghịch (hệ số âm)
	# Công thức y = -2x + 10
	import numpy as np
	import matplotlib.pyplot as plt
	# Dữ liệu tuyến tính nghịch
	x = np.array([1,2,3,4,5,6])
	y = np.array([8,6,4,2,0,-2])
	# Tính hệ số w và b theo công thức thống kê
	w = np.sum((x - x.mean()) * (y - y.mean())) / np.sum((x - x.mean())**2)
	b = y.mean() - w * x.mean()
	# Dự đoán chính xác
	y_pred = w * x + b
	print(f"Phương trình tuyến tính nghịch: y = {w: .0f}x + {b: .0f}")
	print(f"Dự đoán chính xác:", y_pred)
	# Vẽ đồ thị trực quan hóa với tuyến tính nghịch
	plt.scatter(x,y, color='red', label='Real Data')
	plt.plot(x,y_pred, color='blue', label='Predicted Data')
	plt.xlabel('X - Axis')
	plt.ylabel('Y - Axis')
	plt.title('Linear Regression AND Reversed')
	plt.legend()
	plt.show()

	# 4. Không tuyến tính (để xem hồi quy tuyến tính kém thế nào)
	# Công thức: y = x**2 + noise
	import numpy as np
	import matplotlib.pyplot as plt
	# Dữ liệu không tuyến tính
	np.random.seed(1)
	x = np.linspace(-5,5,20) # Giải thích: .linspace(start, stop, num) với num=number là số giá trị cách đều nhau
	y = x*2 + np.random.randn(20)3 # Sinh ra 20 số ngẫu nhiên tuân theo phân phối chuẩn (Gaussian) → trung bình = 0, độ lệch chuẩn = 1; Nhân với 3 để phóng đại độ nhiễu (noise) → sai số ngẫu nhiên có độ lệch chuẩn ≈ 3 → càng nhân lớn, dữ liệu càng "rải" quanh đường parabol. y là giá trị theo hàm bậc hai, có thêm nhiễu ngẫu nhiên để giống dữ liệu thật. Đây là mối quan hệ phi tuyến tính giữa x và y.
	# Tính hệ số w và b theo công thức thống kê
	w = np.sum((x - x.mean()) * (y - y.mean())) / np.sum((x - x.mean())**2)
	b = y.mean() - w * x.mean()
	# Dự đoán chính xác
	y_pred = w * x + b
	print(f"Phương trình không tuyến tính: y={w: 1f}x + {b: .1f}")
	print(f"Dự đoán chính xác:", y_pred)
	# Vẽ đồ thị để trực quan hóa không tuyến tính
	plt.scatter(x,y, color='red', label='Real Data with no linear')
	plt.plot(x,y_pred, color='blue', label='Predicted Data with no linear')
	plt.xlabel('X - Axis')
	plt.ylabel('Y - Axis')
	plt.title(' No Linear Regression')
	plt.legend()
	plt.show()

	# 5. Dữ liệu ngẫu nhiên để luyện hiểu độ phù hợp (low R**2)
	import numpy as np
	import matplotlib.pyplot as plt
	# Dữ liệu ngẫu nhiên
	np.random.seed(2)
	x = np.random.randint(1,100,20) # Giải thích: np.random.randint(low,high,size) với size là tổng số ptử muốn tạo ra
	y = np.random.randint(1,100,20)

	# Tính hệ số w và b theo công thức thống kê
	w = np.sum((x - x.mean()) * (y - y.mean())) / np.sum((x - x.mean())**2)
	b = y.mean() - w * x.mean()
	# Dự đoán chính xác
	y_pred = w * x + b
	print(f"Phương trình tuyến tính + Random Data: y={w: 1f}x + {b: .1f}")
	print(f"Dự đoán chính xác:", y_pred)
	# Vẽ đồ thị để trực quan hóa không tuyến tính
	plt.scatter(x,y, color='red', label='Real Data + Random')
	plt.plot(x,y_pred, color='blue', label='Predicted Data + Random')
	plt.xlabel('X - Axis')
	plt.ylabel('Y - Axis')
	plt.title('Linear Regression With Random Data')
	plt.legend()
	plt.show()