## Packages

import tensorflow as tf
import tensorflow_probability as tfp

import numpy as np
import matplotlib.pyplot as plt

tfd = tfp.distributions
tfpl = tfp.layers

plt.rcParams['figure.figsize'] = (10, 6)

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import RMSprop

print("Tensorflow Version: ", tf.__version__)
print("Tensorflow Probability Version: ", tfp.__version__)

Tensorflow Version:  2.5.0
Tensorflow Probability Version:  0.13.0


## Overview

### Linear model with Sequential API

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

model = Sequential([
Dense(1, input_shape=(2, ))
])

model.compile(loss='mse', optimizer='rmsprop')
model.fit(X_train, y_train, epochs=10)

# y = <x, w> + b + eps
# eps ~ N(0, sigma^2)

# Likelihood
# theta = (w, b)
# theta^* = argmax_{theta} P(D | theta)


### DistributionLambda Layer in Linear Model

import tensorflow_probability as tfp
tfd = tfp.distributions
tfpl = tfp.layers

model = Sequential([
Dense(1, input_shape=(2, )),
# Final layer includes normal distribution
# Form is a sort of Lambda function, that can give the output as a mean
# in this case, output tensor is go through normal distribution
# And normal distriubiton is trainable
tfpl.DistributionLambda(
lambda t: tfd.Normal(loc=t, scale=1)
)
])


### Loss function for Distritubion Object

Negative Log-Likelihood Loss function

def nll(y_true, y_pred):
# Since y_pred is distribution object, we can call log_prob for sample data
return -y_pred.log_prob(y_true)


### Converting Tensor function

In this case, output will be tensor object, not distribution object.

model = Sequential([
Dense(1, input_shape=(2, )),
tfpl.DistributionLambda(
lambda t:tfd.Normal(loc=t, scale=1),
convert_to_tensor_fn=tfd.Distribution.sample
)
])
# tfd.Distribution.mean
# tfd.Distribution.mode


### Example

model = tf.keras.Sequential([
tf.keras.layers.Dense(8, input_shape=(12, )),
tfpl.DistributionLambda(lambda t: tfd.Bernoulli(logits=t))
])

model(np.ones((3, 12), dtype=np.float32)).sample(5)

<tf.Tensor: shape=(5, 3, 8), dtype=int32, numpy=
array([[[0, 0, 0, 1, 1, 1, 1, 0],
[0, 0, 0, 1, 1, 1, 1, 0],
[0, 0, 0, 1, 1, 1, 1, 1]],

[[0, 0, 0, 1, 0, 1, 1, 0],
[0, 0, 0, 1, 0, 1, 1, 1],
[1, 0, 0, 1, 0, 1, 1, 0]],

[[0, 0, 0, 1, 0, 0, 1, 1],
[1, 0, 0, 0, 1, 1, 0, 0],
[0, 0, 0, 0, 0, 1, 1, 0]],

[[1, 0, 0, 1, 1, 1, 0, 1],
[0, 0, 0, 1, 1, 1, 0, 1],
[0, 0, 0, 1, 1, 1, 1, 1]],

[[1, 1, 0, 0, 1, 1, 0, 0],
[1, 1, 0, 1, 1, 1, 1, 0],
[0, 0, 0, 1, 1, 1, 1, 0]]], dtype=int32)>

Its input layer has 8 nodes and passes through the distributionLambda layer. So its event_shape has 8. And input data is entered as 3 batches. We generate 5 samples. As a result, the output tensor shape will be (5, 3, 8).

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #
=================================================================
dense (Dense)                (None, 8)                 104
_________________________________________________________________
distribution_lambda (Distrib multiple                  0
=================================================================
Total params: 104
Trainable params: 104
Non-trainable params: 0
_________________________________________________________________


## Tutorial

### Create a probabilistic model using the DistributionLambda layer

Create a model whose first layer represents:

$$y = \text{sigmoid}(x) = \frac{1}{1 + \exp(-x)}.$$
model = Sequential([
Dense(units=1, input_shape=(1, ), activation='sigmoid',
kernel_initializer=tf.constant_initializer(1),
bias_initializer=tf.constant_initializer(0))
], name='deterministic_sigmoid_model')

model.summary()

Model: "deterministic_sigmoid_model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #
=================================================================
dense_4 (Dense)              (None, 1)                 2
=================================================================
Total params: 2
Trainable params: 2
Non-trainable params: 0
_________________________________________________________________

X = np.linspace(-5, 5, 100)
plt.scatter(X, model.predict(X), alpha=0.4)
plt.plot(X, 1 / (1 + np.exp(-X)), color='r', alpha=0.8)
plt.title('Deterministic Sigmoid function')
plt.show()

x = np.array([[0]])
x

array([[0]])
y = model(x)
y

<tf.Tensor: shape=(1, 1), dtype=float32, numpy=array([[0.5]], dtype=float32)>
for _ in range(5):
print(model.predict(x))

[[0.5]]
[[0.5]]
[[0.5]]
[[0.5]]
[[0.5]]

model = Sequential([
Dense(units=1, input_shape=(1, ), activation='sigmoid',
kernel_initializer=tf.constant_initializer(1),
bias_initializer=tf.constant_initializer(0)),
tfpl.DistributionLambda(lambda t: tfd.Bernoulli(probs=t),
convert_to_tensor_fn=tfd.Distribution.sample)
], name='probabilistic_sigmoid_model')

model.summary()

Model: "probabilistic_sigmoid_model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #
=================================================================
dense_5 (Dense)              (None, 1)                 2
_________________________________________________________________
distribution_lambda_1 (Distr multiple                  0
=================================================================
Total params: 2
Trainable params: 2
Non-trainable params: 0
_________________________________________________________________

X = np.linspace(-5, 5, 100)
plt.scatter(X, model.predict(X), alpha=0.4)
plt.plot(X, 1 / (1 + np.exp(-X)), color='r', alpha=0.8)
plt.title('Probabilistic Sigmoid function')
plt.show()

x = np.array([[0]])
x

array([[0]])
y = model(x)
y

<tfp.distributions.Bernoulli 'probabilistic_sigmoid_model_distribution_lambda_1_Bernoulli' batch_shape=[1, 1] event_shape=[] dtype=int32>
for _ in range(5):
print(model.predict(x))

[[0]]
[[0]]
[[1]]
[[0]]
[[1]]


### Use the forward model to create probabilistic training data

X_train = np.linspace(-5, 5, 500)[:, np.newaxis]
y_train = model.predict(X_train)

plt.scatter(X_train, y_train, alpha=0.04, color='blue', label='samples')
plt.plot(X_train, model(X_train).mean().numpy().flatten(), color='red', alpha=0.8, label='mean')
plt.legend(loc='best')
plt.show()


### Create a new probabilistic model with the wrong weights

model_untrained = Sequential([
Dense(units=1, input_shape=(1, ), activation='sigmoid',
kernel_initializer=tf.constant_initializer(2),
bias_initializer=tf.constant_initializer(2)),
tfpl.DistributionLambda(lambda t:tfd.Bernoulli(probs=t),
convert_to_tensor_fn=tfd.Distribution.sample)
], name='wrong_probabilistic_model')

model.summary()

Model: "probabilistic_sigmoid_model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #
=================================================================
dense_5 (Dense)              (None, 1)                 2
_________________________________________________________________
distribution_lambda_1 (Distr multiple                  0
=================================================================
Total params: 2
Trainable params: 2
Non-trainable params: 0
_________________________________________________________________


### Train the new model with the negative log likelihoood

def nll(y_true, y_pred):
return -y_pred.log_prob(y_true)

model_untrained.compile(loss=nll, optimizer=RMSprop(learning_rate=1e-2))

epochs = [0]
training_weights = [model_untrained.weights[0].numpy()[0, 0]]
training_bias = [model_untrained.weights[1].numpy()[0]]

for e in range(100):
model_untrained.fit(X_train, y_train, epochs=1, verbose=False)
epochs.append(e)

training_weights.append(model_untrained.weights[0].numpy()[0, 0])
training_bias.append(model_untrained.weights[1].numpy()[0])

plt.plot(epochs, training_weights, label='weight')
plt.plot(epochs, training_bias, label='bias')
plt.axhline(y=1, label='true_weight', color='k', linestyle=':')
plt.axhline(y=0, label='true_bias', color='k', linestyle='--')
plt.xlabel('Epoch')
plt.legend(loc='best')
plt.show()


This make sense that the training data is not perfect. It contains some errors itself. This kind of uncertainty is called epistemic uncertainty. We will cover it in later notebook.