Straight line fit in python

$$ \newcommand{\eg}{{\it e.g.}} \newcommand{\ie}{{\it i.e.}} \newcommand{\argmin}{\operatornamewithlimits{argmin}} \newcommand{\mc}{\mathcal} \newcommand{\mb}{\mathbb} \newcommand{\mf}{\mathbf} \newcommand{\minimize}{{\text{minimize}}} \newcommand{\diag}{{\text{diag}}} \newcommand{\cond}{{\text{cond}}} \newcommand{\rank}{{\text{rank }}} \newcommand{\range}{{\mathcal{R}}} \newcommand{\null}{{\mathcal{N}}} \newcommand{\tr}{{\text{trace}}} \newcommand{\dom}{{\text{dom}}} \newcommand{\dist}{{\text{dist}}} \newcommand{\R}{\mathbf{R}} \newcommand{\SM}{\mathbf{S}} \newcommand{\ball}{\mathcal{B}} \newcommand{\bmat}[1]{\begin{bmatrix}#1\end{bmatrix}} $$

EE787: Machine learning, Kyung Hee University.
Jong-Han Kim (jonghank@khu.ac.kr)

In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

n = 50
d = 2
theta_true = np.random.randn(d)
u = np.random.randn(n)
v = theta_true[0] + theta_true[1]*u + 0.2*np.random.randn(n)

plt.figure(dpi=100)
plt.plot(u, v, "o", alpha=0.5, label="Raw data")
plt.xlabel("u")
plt.ylabel("v")
plt.legend()
plt.grid()
plt.show()
In [2]:
X = np.hstack((np.ones((n,1)), u.reshape(n,1)))
y = v

theta_opt = np.linalg.lstsq(X, y, rcond=None)[0]

y_hat = X@theta_opt
RMSE = np.linalg.norm(y-y_hat)/np.sqrt(n)
print(f"theta_true: {theta_true}")
print(f"theta_opt: {theta_opt}")
print(f"RMSE: {RMSE}")
theta_true: [ 0.11200996 -0.51304398]
theta_opt: [ 0.12606031 -0.53167957]
RMSE: 0.18437662477308264
In [3]:
u_test = np.arange(-3,3,0.01)
n_test = len(u_test)
X_test = np.hstack((np.ones((n_test,1)),u_test.reshape(n_test,1)))
y_test = X_test@theta_opt
v_test = y_test

plt.figure(dpi=100)
plt.plot(u, y, "o", alpha=0.5, label="Raw data")
plt.plot(u, X@theta_opt, label="Straight line fit")
plt.xlabel("u")
plt.ylabel("y")
plt.legend()
plt.grid()
plt.show()
In [ ]: