The DistributionLambda Layer
In this post, we will introduce the most direct way of incorporating distribution object into a deep learning model with distribution lambda layer. This is the summary of lecture "Probabilistic Deep Learning with Tensorflow 2" from Imperial College London.
import tensorflow as tf
import tensorflow_probability as tfp
import numpy as np
import matplotlib.pyplot as plt
tfd = tfp.distributions
tfpl = tfp.layers
plt.rcParams['figure.figsize'] = (10, 6)
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import RMSprop
print("Tensorflow Version: ", tf.__version__)
print("Tensorflow Probability Version: ", tfp.__version__)
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
model = Sequential([
Dense(1, input_shape=(2, ))
])
model.compile(loss='mse', optimizer='rmsprop')
model.fit(X_train, y_train, epochs=10)
# y = <x, w> + b + eps
# eps ~ N(0, sigma^2)
# Likelihood
# theta = (w, b)
# theta^* = argmax_{theta} P(D | theta)
import tensorflow_probability as tfp
tfd = tfp.distributions
tfpl = tfp.layers
model = Sequential([
Dense(1, input_shape=(2, )),
# Final layer includes normal distribution
# Form is a sort of Lambda function, that can give the output as a mean
# in this case, output tensor is go through normal distribution
# And normal distriubiton is trainable
tfpl.DistributionLambda(
lambda t: tfd.Normal(loc=t, scale=1)
)
])
Converting Tensor function
In this case, output will be tensor object, not distribution object.
model = Sequential([
Dense(1, input_shape=(2, )),
tfpl.DistributionLambda(
lambda t:tfd.Normal(loc=t, scale=1),
convert_to_tensor_fn=tfd.Distribution.sample
)
])
# tfd.Distribution.mean
# tfd.Distribution.mode
model = tf.keras.Sequential([
tf.keras.layers.Dense(8, input_shape=(12, )),
tfpl.DistributionLambda(lambda t: tfd.Bernoulli(logits=t))
])
model(np.ones((3, 12), dtype=np.float32)).sample(5)
Its input layer has 8 nodes and passes through the distributionLambda layer. So its event_shape has 8. And input data is entered as 3 batches. We generate 5 samples. As a result, the output tensor shape will be (5, 3, 8)
.
model.summary()
model = Sequential([
Dense(units=1, input_shape=(1, ), activation='sigmoid',
kernel_initializer=tf.constant_initializer(1),
bias_initializer=tf.constant_initializer(0))
], name='deterministic_sigmoid_model')
model.summary()
X = np.linspace(-5, 5, 100)
plt.scatter(X, model.predict(X), alpha=0.4)
plt.plot(X, 1 / (1 + np.exp(-X)), color='r', alpha=0.8)
plt.title('Deterministic Sigmoid function')
plt.show()
x = np.array([[0]])
x
y = model(x)
y
for _ in range(5):
print(model.predict(x))
model = Sequential([
Dense(units=1, input_shape=(1, ), activation='sigmoid',
kernel_initializer=tf.constant_initializer(1),
bias_initializer=tf.constant_initializer(0)),
tfpl.DistributionLambda(lambda t: tfd.Bernoulli(probs=t),
convert_to_tensor_fn=tfd.Distribution.sample)
], name='probabilistic_sigmoid_model')
model.summary()
X = np.linspace(-5, 5, 100)
plt.scatter(X, model.predict(X), alpha=0.4)
plt.plot(X, 1 / (1 + np.exp(-X)), color='r', alpha=0.8)
plt.title('Probabilistic Sigmoid function')
plt.show()
x = np.array([[0]])
x
y = model(x)
y
for _ in range(5):
print(model.predict(x))
X_train = np.linspace(-5, 5, 500)[:, np.newaxis]
y_train = model.predict(X_train)
plt.scatter(X_train, y_train, alpha=0.04, color='blue', label='samples')
plt.plot(X_train, model(X_train).mean().numpy().flatten(), color='red', alpha=0.8, label='mean')
plt.legend(loc='best')
plt.show()
model_untrained = Sequential([
Dense(units=1, input_shape=(1, ), activation='sigmoid',
kernel_initializer=tf.constant_initializer(2),
bias_initializer=tf.constant_initializer(2)),
tfpl.DistributionLambda(lambda t:tfd.Bernoulli(probs=t),
convert_to_tensor_fn=tfd.Distribution.sample)
], name='wrong_probabilistic_model')
model.summary()
def nll(y_true, y_pred):
return -y_pred.log_prob(y_true)
model_untrained.compile(loss=nll, optimizer=RMSprop(learning_rate=1e-2))
epochs = [0]
training_weights = [model_untrained.weights[0].numpy()[0, 0]]
training_bias = [model_untrained.weights[1].numpy()[0]]
for e in range(100):
model_untrained.fit(X_train, y_train, epochs=1, verbose=False)
epochs.append(e)
training_weights.append(model_untrained.weights[0].numpy()[0, 0])
training_bias.append(model_untrained.weights[1].numpy()[0])
plt.plot(epochs, training_weights, label='weight')
plt.plot(epochs, training_bias, label='bias')
plt.axhline(y=1, label='true_weight', color='k', linestyle=':')
plt.axhline(y=0, label='true_bias', color='k', linestyle='--')
plt.xlabel('Epoch')
plt.legend(loc='best')
plt.show()
This make sense that the training data is not perfect. It contains some errors itself. This kind of uncertainty is called epistemic uncertainty. We will cover it in later notebook.