Matplotlib Tutorial
In this post, it will cover the basic usage of matplotlib (especially on pyplot) in various ways. This post is a summary of supplement lecture note in "Probability and Statistics in Data Science using Python", offered from UCSD DSE210x
import matplotlib.pyplot as plt
import numpy as np
Tutorial
This notebook will show you how to draw basic/advanced plots using matplotlib. Please refer to the offical page for the details.
X = np.arange(0, 4 * np.pi, 0.1)
y = np.cos(X)
plt.figure(figsize=(10, 8));
plt.plot(X, y);
# Text also accept LaTeX syntax
plt.xlabel('$x$');
plt.ylabel('$y$');
X = np.arange(0, 10, 1)
y_1 = 2 ** X
y_2 = X ** 2
plt.figure(figsize=(10, 8));
# Specify color, linestyle and marker using keyword arguments
plt.plot(X, y_1, label='$2^x$', color='g', linestyle='--', marker='s');
plt.plot(X, y_2, label='$x^2$', color='r', linestyle='-', marker='o');
plt.xlabel('$x$');
plt.ylabel('$y$');
plt.legend(loc='best');
We can also draw this with positional arguments.
plt.figure(figsize=(10, 8));
# Specify color, linestyle and marker using positional arguments
plt.plot(X, y_1, 'g--s', label='$2^x$');
plt.plot(X, y_2, 'r-o', label='$x^2$');
plt.xlabel('$x$');
plt.ylabel('$y$');
plt.legend(loc='best');
plt.rc('font', size=10) # controls default text sizes
plt.rc('axes', titlesize=10) # fontsize of the axes title
plt.rc('axes', labelsize=12) # fontsize of the x and y labels
plt.rc('xtick', labelsize=10) # fontsize of the tick labels
plt.rc('ytick', labelsize=10) # fontsize of the tick labels
plt.rc('legend', fontsize=15) # legend fontsize
x = np.arange(-10, 10, 0.1)
y = x ** 3
plt.figure(figsize=(10, 8));
plt.plot(x, y, label = '$x^3$');
plt.xlabel('$x$', fontsize = 12); # The fontsize can be set here as well
plt.ylabel('$y$', fontsize = 12);
plt.title('$y = x^3$', fontsize = 16); # Set title and its fontsize
plt.legend(loc = 'upper left');
# Add grid
plt.grid();
x = np.arange(0, 6 * np.pi, 0.2)
y_1 = np.cos(x)
y_2 = np.sin(2 * x)
# Plot y = cos(x)
plt.figure(figsize=(10, 8))
plt.subplot(2, 1, 1)
plt.plot(x, y_1, label = '$\cos(x)$')
plt.xlabel('$x$')
plt.ylabel('$y$')
plt.legend(loc = 'best')
# Plot y = sin(2x)
plt.subplot(2, 1, 2)
plt.plot(x, y_2, label = '$\sin(2x)$')
plt.xlabel('$x$')
plt.ylabel('$y$')
plt.legend(loc = 'best')
plt.show()
Also, we can generate subplot, and access with each axis.
x = np.arange(0, 6 * np.pi, 0.2)
y_1 = np.cos(x)
y_2 = np.sin(2 * x)
# Plot y = cos(x)
fig, ax = plt.subplots(2, 1, figsize=(10, 8))
ax[0].plot(x, y_1, label = '$\cos(x)$')
ax[0].set_xlabel('$x$')
ax[0].set_ylabel('$y$')
ax[0].legend(loc = 'best')
# Plot y = sin(2x)
ax[1].plot(x, y_2, label = '$\sin(2x)$')
ax[1].set_xlabel('$x$')
ax[1].set_ylabel('$y$')
ax[1].legend(loc = 'best')
plt.show()
In subplots, if the scale is same in some figures, it can enable to share axis (x axis or y axis)
x = np.arange(0, 6 * np.pi, 0.2)
y_1 = np.cos(x)
y_2 = np.sin(2 * x)
y_3 = y_1 + y_2
fig, axs = plt.subplots(3, 1, sharex = True, figsize=(10, 8))
axs[0].plot(x, y_1)
axs[1].plot(x, y_2)
axs[2].plot(x, y_3)
axs[0].set_ylabel('$y$')
axs[1].set_ylabel('$y$')
axs[2].set_ylabel('$y$')
axs[2].set_xlabel('$x$')
plt.show()
x = np.arange(0, 7, 1) # x in [2, 7)
y = x
plt.figure(figsize=(10, 8))
plt.bar(x, y, label = '$x$')
plt.xlabel('$x$')
plt.ylabel('$y$')
plt.legend(loc = 'upper left')
plt.show()
calories = [380.70, 420.98, 454.91, 406.45, 446.16, 498.08, 504.54, 459.05, 459.55, 484.79]
countries = ['India', 'Japan', 'Korea', 'China', 'Thai', 'Italy', 'France', 'Greece', 'Mexico', 'US']
obesity_rates = [3.9, 4.3, 4.7, 6.2, 10, 19.9, 21.6, 24.9, 28.9, 36.2]
fig, ax1 = plt.subplots(figsize = (10, 8))
ax1.bar(countries, obesity_rates, color='C8')
ax1.set_ylabel('obesity rate(%)', color='C8')
ax1.tick_params(axis='y', labelcolor='C8')
# Enable multiple axis
ax2 = ax1.twinx()
ax2.plot(countries, calories, color='C0')
ax2.set_ylabel('calories', color='C0')
ax2.tick_params(axis='y', labelcolor='C0')
plt.show()
Scatter plot
A scatter plot displays values for typically two variables for a set of data. The data are displayed as a collection of points.
Plot $y = 2x + 3 + \epsilon$, where $\epsilon \sim \mathcal{N}(0, 1)$ (also known as Gaussian Noise). The following code makes a scatter plot for all $(x, y)$ pairs.
Note that, it is widely used in comparing the actual point and linear regression model.
x = np.arange(0, 10, 0.5) # x in [0, 10)
noise = np.random.randn(len(x)) # Generate standard normal random variables
y = 2 * x + 3 + noise
plt.figure()
plt.scatter(x, y)
plt.plot(x, 2 * x + 3, color = 'r')
plt.xlabel('$x$')
plt.ylabel('$y$')
plt.show()
Contour Plot
A contour plot represents a 3-dimensional surface by plotting constant $z$ slices (contours). Given a value for $z$, lines are drawn for connecting the $(x,y)$ coordinates where that $z$ value occurs.
Here, it will plot the contours of $J(\mathbf{w})$
$$ J(\mathbf{w}) = (\mathbf{w} - \mathbf{w}_{o})^{T}\mathbf{A}(\mathbf{w} - \mathbf{w}_{o}) $$where $\mathbf{w} = \begin{bmatrix} -2 \\ 2 \end{bmatrix}$, $\mathbf{A} = \begin{bmatrix} 2 & 0 \\ 0 & 1 \end{bmatrix}$.
xmin, xmax, xstep = -4, 0, .1
ymin, ymax, ystep = 0, 4, .1
A = np.array([[2, 0], [0, 1]])
w0 = np.array([-2., 2.]).reshape(2, 1)
J = lambda x, y: A[0, 0] * (x - w0[0]) ** 2 + (A[0, 1] + A[1, 0]) * (x - w0[0]) * (y - w0[1]) + A[1, 1] * (y - w0[1]) ** 2
gradient_u = lambda x, y: (A[0, 0] * (x - w0[0]) + A[0, 1] * (y - w0[1])) + (A[0, 0] * (x - w0[0]) + A[1, 0] * (y - w0[1]))
gradient_v = lambda x, y: (A[1, 0] * (x - w0[0]) + A[1, 1] * (y - w0[1])) + (A[0, 1] * (x - w0[0]) + A[1, 1] * (y - w0[1]))
x, y = np.meshgrid(np.arange(xmin, xmax + xstep, xstep),
np.arange(ymin, ymax + ystep, ystep))
z = J(x, y)
fig, ax = plt.subplots(figsize=(7,7))
ax.contour(x, y, z, levels=np.logspace(0, 5, 35), cmap='jet')
ax.set_xlabel('$x$')
ax.set_ylabel('$y$')
ax.set_xlim((xmin, xmax))
ax.set_ylim((ymin, ymax))
plt.show()
x1, y1 = np.meshgrid(np.arange(xmin, xmax, 0.2),
np.arange(ymin, ymax, 0.2))
u1 = gradient_u(x1, y1)
v1 = gradient_v(x1, y1)
fig, ax = plt.subplots(figsize=(7, 7))
ax.quiver(x1, y1, u1, v1)
ax.set_xlabel('$x$')
ax.set_ylabel('$y$')
ax.set_xlim((xmin, xmax))
ax.set_ylim((ymin, ymax))
plt.show()
We can add contour plot on the quiver plot.
fig, ax = plt.subplots(figsize=(7, 7))
ax.contour(x, y, z, levels=np.logspace(0, 5, 35), cmap='jet')
ax.quiver(x1, y1, u1, v1)
ax.set_xlabel('$x$')
ax.set_ylabel('$y$')
ax.set_xlim((xmin, xmax))
ax.set_ylim((ymin, ymax))
plt.show()
3D Faces
Besides of contour plot, we can directly plot the 3D faces as well using matplotlib
.
Plot the 3D-face of 2D joint Gaussian distribution.
$\boldsymbol{\mu} = \begin{bmatrix} 0 \\ 0 \end{bmatrix} $, $\boldsymbol{\Sigma} = \begin{bmatrix} 1 & 0 \\ 0 & 1 \end{bmatrix} $.
import numpy as np
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
import matplotlib.pyplot as plt
from scipy.stats import multivariate_normal
Mu = np.array([0, 0])
Cov = np.array([[1, 0], [0, 1]])
rv = multivariate_normal(Mu, Cov)
fig = plt.figure()
ax = Axes3D(fig)
X = np.arange(-10, 10, 0.25)
Y = np.arange(-10, 10, 0.25)
X, Y = np.meshgrid(X, Y)
pos = np.empty(X.shape + (2,))
pos[:, :, 0] = X; pos[:, :, 1] = Y
Z = rv.pdf(pos)
ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.jet)
plt.show()
We can change the location and covariance matrix.
$\boldsymbol{\mu} = \begin{bmatrix} 0 \\ 4 \end{bmatrix} $, $\boldsymbol{\Sigma} = \begin{bmatrix} 5 & 0 \\ 0 & 1 \end{bmatrix} $.
Mu = np.array([0, 4])
Cov = np.array([[5, 0], [0, 1]])
rv = multivariate_normal(Mu, Cov)
fig = plt.figure()
ax = Axes3D(fig)
X = np.arange(-10, 10, 0.25)
Y = np.arange(-10, 10, 0.25)
X, Y = np.meshgrid(X, Y)
pos = np.empty(X.shape + (2,))
pos[:, :, 0] = X; pos[:, :, 1] = Y
Z = rv.pdf(pos)
ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.jet)
plt.show()
$\boldsymbol{\mu} = \begin{bmatrix} 0 \\ 0 \end{bmatrix} $, $\boldsymbol{\Sigma} = \begin{bmatrix} 10.5 & -9.5 \\ -9.5 & 10.5 \end{bmatrix} $.
Mu = np.array([0, 0])
Cov = np.array([[10.5, -9.5], [-9.5, 10.5]])
rv = multivariate_normal(Mu, Cov)
fig = plt.figure()
ax = Axes3D(fig)
X = np.arange(-10, 10, 0.25)
Y = np.arange(-10, 10, 0.25)
X, Y = np.meshgrid(X, Y)
pos = np.empty(X.shape + (2,))
pos[:, :, 0] = X; pos[:, :, 1] = Y
Z = rv.pdf(pos)
ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.jet)
plt.show()
Box plot
In this example we use the dataset California Housing Prices, which contains information from the 1990 California census. seaborn
, a high-level API based on matplotlib
, is also used.
A box plot shows the interquartile range, midhinge, range, mid-range, and trimean of the data. Dots the whiskers are outliers.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
df = pd.read_csv('./dataset/housing.csv')
df = df.dropna()
plt.figure(figsize = (10, 6))
sns.boxplot(data=df, x = 'ocean_proximity', y = 'median_house_value', palette = 'viridis')
plt.show()
plt.figure(figsize=(10, 6))
sns.heatmap(cbar=False,annot=True,data=df.corr(),cmap='coolwarm')
plt.title('Correlation Matrix')
plt.show()
If the value is close to 1, it means that the variable is positively correlated with observation. and when the observation is increased, then the variable has high probability to increase. and vice versa.
import ipywidgets as widgets
@widgets.interact(s1=(1, 30.0), s2=(1, 30.0), continuous_update=False)
def f(s1, s2):
plt.figure()
x = np.arange(0, 20 * np.pi, 0.1)
y = np.cos(s1 * x) + np.cos(s2 * x)
plt.plot(x, y)
plt.show()
Matplotlib Widgets
Matplotlib also has widget to control variable interactively. So if you want to use sliders outside notebooks, matplotlib
provides an option for you. (This option is only available in python runtime, not jupyter notebook)
The following example plots $ y = \cos(\omega_1 x) + \cos(\omega_2 x)$. $\omega_1$ and $\omega_2$ which can be controlled by sliders.
from matplotlib.widgets import Slider
x = np.arange(0, 20 * np.pi, 0.1)
y = np.cos(3 * x) + np.cos(5 * x)
fig, ax = plt.subplots()
plt.subplots_adjust(bottom=0.2,left=0.3)
l, = plt.plot(x, y)
om1= plt.axes([0.25, 0.1, 0.65, 0.03])
om2 = plt.axes([0.25, 0.15, 0.65, 0.03])
som1 = Slider(om1, r'$\omega_1$', 1, 30.0, valinit=3)
som2 = Slider(om2, r'$\omega_2$', 1, 30.0, valinit=5)
def update(val):
s1 = som1.val
s2 = som2.val
x = np.arange(0, 20 * np.pi, 0.1)
y = np.cos(s1 * x) + np.cos(s2 * x)
l.set_ydata(y)
l.set_xdata(x)
som1.on_changed(update)
som2.on_changed(update)
plt.show()