Packages

import os
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

plt.rcParams['figure.figsize'] = (10, 6)

Load the dataset

Here, we will use Cats and Dogs datasets from kaggle, which is binary classification problem. For the simplicity, its datasets are filtered with some images.

base_dir = './dataset/cats_and_dogs_filtered'
os.listdir(base_dir)
['vectorize.py', 'validation', 'train']
train_dir = os.path.join(base_dir, 'train')
val_dir = os.path.join(base_dir, 'validation')
os.listdir(train_dir)
['dogs', 'cats']
os.listdir(val_dir)
['dogs', 'cats']

To use ImageDataGenerator in Tensorflow, the folder structure should be organized hierarchically. For example,

  • train
    • label_1
    • label_2
    • ...
  • val
    • label_1
    • label_2
    • ...

Anyway, we prepare the directory path for the convenience.

train_cat_dir = os.path.join(train_dir, 'cats')
train_dog_dir = os.path.join(train_dir, 'dogs')
val_cat_dir = os.path.join(val_dir, 'cats')
val_dog_dir = os.path.join(val_dir, 'dogs')

train_dog_fname = os.listdir(train_dog_dir)
train_cat_fname = os.listdir(train_cat_dir)

Check the sample images

nrows = 4
ncols = 4

pic_idx = 0

fig = plt.gcf()
fig.set_size_inches(ncols * 4, nrows * 4)

pic_idx += 8
next_cat_pic = [os.path.join(train_cat_dir, fname) for fname in train_cat_fname[pic_idx - 8:pic_idx]]
next_dog_pic = [os.path.join(train_dog_dir, fname) for fname in train_dog_fname[pic_idx - 8:pic_idx]]

for i, img_path in enumerate(next_cat_pic + next_dog_pic):
    sp = plt.subplot(nrows, ncols, i + 1)
    sp.axis('off')
    
    img = mpimg.imread(img_path)
    plt.imshow(img)
    
plt.show()