Matplotlib Tutorial
In this post, it will cover the basic usage of matplotlib (especially on pyplot) in various ways. This post is a summary of supplement lecture note in "Probability and Statistics in Data Science using Python", offered from UCSD DSE210x
import matplotlib.pyplot as plt
import numpy as np
Tutorial
This notebook will show you how to draw basic/advanced plots using matplotlib. Please refer to the offical page for the details.
X = np.arange(0, 4 * np.pi, 0.1)
y = np.cos(X)
plt.figure(figsize=(10, 8));
plt.plot(X, y);
# Text also accept LaTeX syntax
plt.xlabel('$x$');
plt.ylabel('$y$');
X = np.arange(0, 10, 1)
y_1 = 2 ** X
y_2 = X ** 2
plt.figure(figsize=(10, 8));
# Specify color, linestyle and marker using keyword arguments
plt.plot(X, y_1, label='$2^x$', color='g', linestyle='--', marker='s');
plt.plot(X, y_2, label='$x^2$', color='r', linestyle='-', marker='o');
plt.xlabel('$x$');
plt.ylabel('$y$');
plt.legend(loc='best');
We can also draw this with positional arguments.
plt.figure(figsize=(10, 8));
# Specify color, linestyle and marker using positional arguments
plt.plot(X, y_1, 'g--s', label='$2^x$');
plt.plot(X, y_2, 'r-o', label='$x^2$');
plt.xlabel('$x$');
plt.ylabel('$y$');
plt.legend(loc='best');
plt.rc('font', size=10) # controls default text sizes
plt.rc('axes', titlesize=10) # fontsize of the axes title
plt.rc('axes', labelsize=12) # fontsize of the x and y labels
plt.rc('xtick', labelsize=10) # fontsize of the tick labels
plt.rc('ytick', labelsize=10) # fontsize of the tick labels
plt.rc('legend', fontsize=15) # legend fontsize
x = np.arange(-10, 10, 0.1)
y = x ** 3
plt.figure(figsize=(10, 8));
plt.plot(x, y, label = '$x^3$');
plt.xlabel('$x$', fontsize = 12); # The fontsize can be set here as well
plt.ylabel('$y$', fontsize = 12);
plt.title('$y = x^3$', fontsize = 16); # Set title and its fontsize
plt.legend(loc = 'upper left');
# Add grid
plt.grid();
x = np.arange(0, 6 * np.pi, 0.2)
y_1 = np.cos(x)
y_2 = np.sin(2 * x)
# Plot y = cos(x)
plt.figure(figsize=(10, 8))
plt.subplot(2, 1, 1)
plt.plot(x, y_1, label = '$\cos(x)$')
plt.xlabel('$x$')
plt.ylabel('$y$')
plt.legend(loc = 'best')
# Plot y = sin(2x)
plt.subplot(2, 1, 2)
plt.plot(x, y_2, label = '$\sin(2x)$')
plt.xlabel('$x$')
plt.ylabel('$y$')
plt.legend(loc = 'best')
plt.show()
Also, we can generate subplot, and access with each axis.
x = np.arange(0, 6 * np.pi, 0.2)
y_1 = np.cos(x)
y_2 = np.sin(2 * x)
# Plot y = cos(x)
fig, ax = plt.subplots(2, 1, figsize=(10, 8))
ax[0].plot(x, y_1, label = '$\cos(x)$')
ax[0].set_xlabel('$x$')
ax[0].set_ylabel('$y$')
ax[0].legend(loc = 'best')
# Plot y = sin(2x)
ax[1].plot(x, y_2, label = '$\sin(2x)$')
ax[1].set_xlabel('$x$')
ax[1].set_ylabel('$y$')
ax[1].legend(loc = 'best')
plt.show()
In subplots, if the scale is same in some figures, it can enable to share axis (x axis or y axis)
x = np.arange(0, 6 * np.pi, 0.2)
y_1 = np.cos(x)
y_2 = np.sin(2 * x)
y_3 = y_1 + y_2
fig, axs = plt.subplots(3, 1, sharex = True, figsize=(10, 8))
axs[0].plot(x, y_1)
axs[1].plot(x, y_2)
axs[2].plot(x, y_3)
axs[0].set_ylabel('$y$')
axs[1].set_ylabel('$y$')
axs[2].set_ylabel('$y$')
axs[2].set_xlabel('$x$')
plt.show()
x = np.arange(0, 7, 1) # x in [2, 7)
y = x
plt.figure(figsize=(10, 8))
plt.bar(x, y, label = '$x$')
plt.xlabel('$x$')
plt.ylabel('$y$')
plt.legend(loc = 'upper left')
plt.show()
calories = [380.70, 420.98, 454.91, 406.45, 446.16, 498.08, 504.54, 459.05, 459.55, 484.79]
countries = ['India', 'Japan', 'Korea', 'China', 'Thai', 'Italy', 'France', 'Greece', 'Mexico', 'US']
obesity_rates = [3.9, 4.3, 4.7, 6.2, 10, 19.9, 21.6, 24.9, 28.9, 36.2]
fig, ax1 = plt.subplots(figsize = (10, 8))
ax1.bar(countries, obesity_rates, color='C8')
ax1.set_ylabel('obesity rate(%)', color='C8')
ax1.tick_params(axis='y', labelcolor='C8')
# Enable multiple axis
ax2 = ax1.twinx()
ax2.plot(countries, calories, color='C0')
ax2.set_ylabel('calories', color='C0')
ax2.tick_params(axis='y', labelcolor='C0')
plt.show()
Scatter plot
A scatter plot displays values for typically two variables for a set of data. The data are displayed as a collection of points.
Plot $y = 2x + 3 + \epsilon$, where $\epsilon \sim \mathcal{N}(0, 1)$ (also known as Gaussian Noise). The following code makes a scatter plot for all $(x, y)$ pairs.
Note that, it is widely used in comparing the actual point and linear regression model.
x = np.arange(0, 10, 0.5) # x in [0, 10)
noise = np.random.randn(len(x)) # Generate standard normal random variables
y = 2 * x + 3 + noise
plt.figure()
plt.scatter(x, y)
plt.plot(x, 2 * x + 3, color = 'r')
plt.xlabel('$x$')
plt.ylabel('$y$')
plt.show()
Contour Plot
A contour plot represents a 3-dimensional surface by plotting constant $z$ slices (contours). Given a value for $z$, lines are drawn for connecting the $(x,y)$ coordinates where that $z$ value occurs.
Here, it will plot the contours of $J(\mathbf{w})$
$$ J(\mathbf{w}) = (\mathbf{w} - \mathbf{w}_{o})^{T}\mathbf{A}(\mathbf{w} - \mathbf{w}_{o}) $$where $\mathbf{w} = \begin{bmatrix} -2 \\ 2 \end{bmatrix}$, $\mathbf{A} = \begin{bmatrix} 2 & 0 \\ 0 & 1 \end{bmatrix}$.
xmin, xmax, xstep = -4, 0, .1
ymin, ymax, ystep = 0, 4, .1
A = np.array([[2, 0], [0, 1]])
w0 = np.array([-2., 2.]).reshape(2, 1)
J = lambda x, y: A[0, 0] * (x - w0[0]) ** 2 + (A[0, 1] + A[1, 0]) * (x - w0[0]) * (y - w0[1]) + A[1, 1] * (y - w0[1]) ** 2
gradient_u = lambda x, y: (A[0, 0] * (x - w0[0]) + A[0, 1] * (y - w0[1])) + (A[0, 0] * (x - w0[0]) + A[1, 0] * (y - w0[1]))
gradient_v = lambda x, y: (A[1, 0] * (x - w0[0]) + A[1, 1] * (y - w0[1])) + (A[0, 1] * (x - w0[0]) + A[1, 1] * (y - w0[1]))
x, y = np.meshgrid(np.arange(xmin, xmax + xstep, xstep),
np.arange(ymin, ymax + ystep, ystep))
z = J(x, y)
fig, ax = plt.subplots(figsize=(7,7))
ax.contour(x, y, z, levels=np.logspace(0, 5, 35), cmap='jet')
ax.set_xlabel('$x$')
ax.set_ylabel('$y$')
ax.set_xlim((xmin, xmax))
ax.set_ylim((ymin, ymax))
plt.show()
x1, y1 = np.meshgrid(np.arange(xmin, xmax, 0.2),
np.arange(ymin, ymax, 0.2))
u1 = gradient_u(x1, y1)
v1 = gradient_v(x1, y1)
fig, ax = plt.subplots(figsize=(7, 7))
ax.quiver(x1, y1, u1, v1)
ax.set_xlabel('$x$')
ax.set_ylabel('$y$')
ax.set_xlim((xmin, xmax))
ax.set_ylim((ymin, ymax))
plt.show()
We can add contour plot on the quiver plot.
fig, ax = plt.subplots(figsize=(7, 7))
ax.contour(x, y, z, levels=np.logspace(0, 5, 35), cmap='jet')
ax.quiver(x1, y1, u1, v1)
ax.set_xlabel('$x$')
ax.set_ylabel('$y$')
ax.set_xlim((xmin, xmax))
ax.set_ylim((ymin, ymax))
plt.show()
3D Faces
Besides of contour plot, we can directly plot the 3D faces as well using matplotlib
.
Plot the 3D-face of 2D joint Gaussian distribution.
$\boldsymbol{\mu} = \begin{bmatrix} 0 \\ 0 \end{bmatrix} $, $\boldsymbol{\Sigma} = \begin{bmatrix} 1 & 0 \\ 0 & 1 \end{bmatrix} $.
import numpy as np
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
import matplotlib.pyplot as plt
from scipy.stats import multivariate_normal
Mu = np.array([0, 0])
Cov = np.array([[1, 0], [0, 1]])
rv = multivariate_normal(Mu, Cov)
fig = plt.figure()
ax = Axes3D(fig)
X = np.arange(-10, 10, 0.25)
Y = np.arange(-10, 10, 0.25)
X, Y = np.meshgrid(X, Y)
pos = np.empty(X.shape + (2,))
pos[:, :, 0] = X; pos[:, :, 1] = Y
Z = rv.pdf(pos)
ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.jet)
plt.show()
We can change the location and covariance matrix.
$\boldsymbol{\mu} = \begin{bmatrix} 0 \\ 4 \end{bmatrix} $, $\boldsymbol{\Sigma} = \begin{bmatrix} 5 & 0 \\ 0 & 1 \end{bmatrix} $.
Mu = np.array([0, 4])
Cov = np.array([[5, 0], [0, 1]])
rv = multivariate_normal(Mu, Cov)
fig = plt.figure()
ax = Axes3D(fig)
X = np.arange(-10, 10, 0.25)
Y = np.arange(-10, 10, 0.25)
X, Y = np.meshgrid(X, Y)
pos = np.empty(X.shape + (2,))
pos[:, :, 0] = X; pos[:, :, 1] = Y
Z = rv.pdf(pos)
ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.jet)
plt.show()
$\boldsymbol{\mu} = \begin{bmatrix} 0 \\ 0 \end{bmatrix} $, $\boldsymbol{\Sigma} = \begin{bmatrix} 10.5 & -9.5 \\ -9.5 & 10.5 \end{bmatrix} $.
Mu = np.array([0, 0])
Cov = np.array([[10.5, -9.5], [-9.5, 10.5]])
rv = multivariate_normal(Mu, Cov)
fig = plt.figure()
ax = Axes3D(fig)
X = np.arange(-10, 10, 0.25)
Y = np.arange(-10, 10, 0.25)
X, Y = np.meshgrid(X, Y)
pos = np.empty(X.shape + (2,))
pos[:, :, 0] = X; pos[:, :, 1] = Y
Z = rv.pdf(pos)
ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.jet)
plt.show()