from src.utils.libimports.mnistann import *
from src.utils.data.files import *
from src.utils.compute.gpu import *
from src.utils.plotting.nn import *

mnist_zip, mnist_folder = download_dataset("images/classification/mnist/mnist.zip")

print(f"mnist.zip downloaded into folder {mnist_folder}")

100%|█████████████████████████████████████| 11.6M/11.6M [00:00<00:00, 18.6MiB/s]

mnist.zip downloaded into folder data/datasets/images/classification/mnist/

#mnist_folder = "/path/to/file/location/"

decompress_file(mnist_zip, target_path=mnist_folder)

# Select preferred device (GPU, if available; CPU otherwise); you can enfore the use of the CPU
device = select_device(force_cpu=False)

print("Available device: {}".format(device))

Available device: cuda:0

def load_mnist_images(path):
    with open(path, "rb") as f:
        # First 16 bytes are magic number, n_imgages, n_rows, n_columns
        pixels = np.frombuffer(f.read(), dtype=np.uint8, offset=16)
    # Reshape array into (n_images, n_pixels_per_image) and return
    return pixels.reshape(-1, 784)

images_train = load_mnist_images(mnist_folder+"train-images.idx3-ubyte")
images_test  = load_mnist_images(mnist_folder+"t10k-images.idx3-ubyte")

print(f"Shape (n_images, n_pixels) of training data: {images_train.shape}")
print(f"Shape (n_images, n_pixels) of test data: {images_test.shape}")

Shape (n_images, n_pixels) of training data: (60000, 784)
Shape (n_images, n_pixels) of test data: (10000, 784)

def load_mnist_labels(path):
    with open(path, "rb") as f:
        # First 8 bytes are magic_number, n_images
        labels = np.frombuffer(f.read(), dtype=np.uint8, offset=8)
    return labels

labels_train = load_mnist_labels(mnist_folder+"train-labels.idx1-ubyte")
labels_test  = load_mnist_labels(mnist_folder+"t10k-labels.idx1-ubyte")

print(f"Shape (n_images, n_pixels) of training labels: {labels_train.shape}")
print(f"Shape (n_images, n_pixels) of test labels: {labels_test.shape}")

Shape (n_images, n_pixels) of training labels: (60000,)
Shape (n_images, n_pixels) of test labels: (10000,)

n_row, n_col = 3, 5

fig, axes = plt.subplots(n_row, n_col, figsize=(1.5*n_col,2*n_row))
for i in range(n_row*n_col):
    ax = axes[i//n_col, i%n_col]
    ax.imshow(images_test[i].reshape(28,28), cmap='gray_r')
    ax.set_title('Label: {}'.format(labels_test[i]))
    ax.tick_params(axis='both', which='both', bottom=False, top=False, labelbottom=False, right=False, left=False, labelleft=False)
plt.tight_layout()
plt.show()

X_train = torch.Tensor(images_train.copy())
y_train = torch.LongTensor(labels_train.copy())

X_test = torch.Tensor(images_test.copy())
y_test = torch.LongTensor(labels_test.copy())

dataset_train = TensorDataset(X_train, y_train)
dataset_test  = TensorDataset(X_test,  y_test)

loader_train = DataLoader(dataset_train, batch_size=64, shuffle=True)
loader_test  = DataLoader(dataset_test,  batch_size=64)

class MnistANN(nn.Module):

    def __init__(self):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(28*28, 128),
            nn.ReLU(),
            #nn.Linear(128, 64),
            #nn.ReLU(),
            nn.Linear(128, 10),
            nn.LogSoftmax(dim=1)
        )

    def forward(self, inputs):
        return self.model(inputs)

model = MnistANN().to(device)

print(model)

MnistANN(
  (model): Sequential(
    (0): Linear(in_features=784, out_features=128, bias=True)
    (1): ReLU()
    (2): Linear(in_features=128, out_features=10, bias=True)
    (3): LogSoftmax(dim=1)
  )
)

def train_epoch(model, loader, optimizer, criterion):
    # Initialize epoch loss (cummulative loss fo all batchs)
    epoch_loss = 0.0
    # Use tqdm to get a nice progress bar
    with tqdm(total=len(loader)) as pbar:
        # Iterate over all batches
        for X_batch, y_batch in loader:
            # Move the batch to the correct device
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            # Get model outputs as log probabiltiies
            log_probs = model(X_batch)
            # Calculate loss
            loss = criterion(log_probs, y_batch)
            ### Pytorch magic! ###
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            # Keep track of overall epoch loss
            epoch_loss += loss.item()
            # Update progress bar
            pbar.update(1)
    # Return total loss across whole dataset
    return epoch_loss

def evaluate(model, loader):
    # Define 2 lists holding all true labels and all predicted labels
    y_true, y_pred = [], []
    # Use tqdm to get a nice progress bar
    with tqdm(total=len(loader)) as pbar:
        # Iterate over all batches
        for X_batch, y_batch in loader:
            # Move the batch to the correct device
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            # Get model outputs as log probabiltiies
            log_probs = model(X_batch)
            # Calculate predicted class labels based on largest log probability values
            y_batch_pred = torch.argmax(log_probs, dim=1)
            # Add the true labels and predicted labels for batch ot final lists
            y_true += list(y_batch.cpu())
            y_pred += list(y_batch_pred.cpu())
            # Update progress bar
            pbar.update(1)
    # Return f1 score
    return f1_score(y_true, y_pred, average='macro')

f1 = evaluate(model, loader_test)

print(f"F1 score: {f1:.3f}")

100%|████████████████████████████████████████| 157/157 [00:00<00:00, 417.15it/s]

F1 score: 0.041

# Define loss function
criterion = nn.NLLLoss()
# Define optimizer for the update step
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

num_epochs = 10
results = []

for epoch in range(1, num_epochs+1):
    # Set model into training mode
    model.train()
    # Train model for a single epch
    epoch_loss = train_epoch(model, loader_train, optimizer, criterion)
    # Set model into evaluation mode
    model.eval()
    # Calculate training and test f1 scores
    f1_train = evaluate(model, loader_train)
    f1_test = evaluate(model, loader_test)
    # Append the epoch loss, training f1 score, and test f1 score to final result list (for plotting later)
    results.append((epoch_loss, f1_train, f1_test))
    # Print progress(epoch loss, training f1 score, and test f1 score)
    print(f"[Epoch {epoch}] loss:\t{epoch_loss:.3f}, f1 train: {f1_train:.3f}, f1 test: {f1_test:.3f}")

100%|████████████████████████████████████████| 938/938 [00:02<00:00, 314.84it/s]
100%|████████████████████████████████████████| 938/938 [00:01<00:00, 509.61it/s]
100%|████████████████████████████████████████| 157/157 [00:00<00:00, 591.41it/s]

[Epoch 1] loss:	1436.254, f1 train: 0.919, f1 test: 0.911

100%|████████████████████████████████████████| 938/938 [00:02<00:00, 324.43it/s]
100%|████████████████████████████████████████| 938/938 [00:01<00:00, 523.10it/s]
100%|████████████████████████████████████████| 157/157 [00:00<00:00, 589.31it/s]

[Epoch 2] loss:	325.271, f1 train: 0.946, f1 test: 0.932

100%|████████████████████████████████████████| 938/938 [00:02<00:00, 326.46it/s]
100%|████████████████████████████████████████| 938/938 [00:01<00:00, 476.34it/s]
100%|████████████████████████████████████████| 157/157 [00:00<00:00, 590.36it/s]

[Epoch 3] loss:	192.427, f1 train: 0.961, f1 test: 0.945

100%|████████████████████████████████████████| 938/938 [00:02<00:00, 324.78it/s]
100%|████████████████████████████████████████| 938/938 [00:01<00:00, 493.98it/s]
100%|████████████████████████████████████████| 157/157 [00:00<00:00, 589.79it/s]

[Epoch 4] loss:	133.369, f1 train: 0.972, f1 test: 0.953

100%|████████████████████████████████████████| 938/938 [00:02<00:00, 324.80it/s]
100%|████████████████████████████████████████| 938/938 [00:01<00:00, 524.41it/s]
100%|████████████████████████████████████████| 157/157 [00:00<00:00, 592.39it/s]

[Epoch 5] loss:	94.572, f1 train: 0.977, f1 test: 0.953

plot_training_results(results, legend=['Loss (normalized)', 'F1 (train)', 'F1 (test)'])

def predict(model, batch):
    # Pass batch to model to get log probabilities
    log_probs = model(batch)
    # Return predictions derived from the largest log probabilities
    return torch.argmax(log_probs, dim=1)

# Create batch from the first k images of the test set
batch_pred = predict(model, X_test[:n_col*n_row].to(device)).cpu()
# Organize the predicted class label to match the grid layout from above
batch_pred = batch_pred.reshape(n_row, n_col)
# Print predicted class labels
print(batch_pred)

tensor([[7, 2, 1, 0, 4],
        [1, 4, 9, 5, 9],
        [0, 6, 9, 0, 1]])

y_true, y_pred = [], []

# Use tqdm to get a nice progress bar
with tqdm(total=len(loader_test)) as pbar:
    # Iterate over all batches
    for idx, (X_batch, y_batch) in enumerate(loader_test):
        # Move the batch to the correct device
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        # Get model outputs as log probabiltiies
        log_probs = model(X_batch)
        # Calculate predicted class labels based on largest log probability values
        y_batch_pred = torch.argmax(log_probs, dim=1)
        # Add the true labels and predicted labels for batch ot final lists
        y_true += list(y_batch.cpu())
        y_pred += list(y_batch_pred.cpu())
        # Update progress bar
        pbar.update(1)

100%|████████████████████████████████████████| 157/157 [00:00<00:00, 538.11it/s]

# Find all misclassifications (and correct classifications)
mismatch = (~np.equal(y_true, y_pred)).astype(int)
# Extract the indices of all misclassifications
indices_misclassifications = np.flatnonzero(mismatch)
# Print the number of misclassifications
print(f"Number of misclassifications: {indices_misclassifications.shape}")

Number of misclassifications: (347,)

X_misclassifications = X_test[indices_misclassifications]
y_misclassifications = y_test[indices_misclassifications]

fig, axes = plt.subplots(n_row, n_col, figsize=(1.5*n_col,2*n_row))
for i in range(n_row*n_col):
    ax = axes[i//n_col, i%n_col]
    ax.imshow(X_misclassifications[i].reshape(28,28), cmap='gray_r')
    log_probs = model(X_misclassifications[i].reshape(1,-1).to(device))
    label_pred = torch.argmax(log_probs, dim=1).cpu().item()
    label_true = y_misclassifications[i].cpu().item()
    ax.set_title(f"{label_true} vs. {label_pred}")
plt.tight_layout()
plt.show()

Byte Offset	Length (bytes)	Description
0	4	Magic number (0x00000803) — identifies the file type as a set of images
4	4	Number of images (e.g., 60,000 for training set)
8	4	Number of rows (28)
12	4	Number of columns (28)

Byte Offset	Length (bytes)	Description
0	4	Magic number (0x00000801) — identifies the file type as labels
4	4	Number of labels (e.g., 60,000 for training set)

Handwritten Digit Recognition with Artificial Neural Networks (ANNs)¶

Setting up the Notebook¶

Make Required Imports¶

Download MNIST Dataset¶

Checking & Setting Computing Device¶

Load & Prepare Dataset¶

Load Images & Labels¶

Generate `Dataset` & `DataLoader`¶

Create & Train Model¶

Create ANN Model Architecture¶

Model Training¶

Definition of Auxiliary Methods¶

Definition of Loss Function and Optimizer¶

Training Loop¶

Making Predictions¶

Error Analysis¶

Discussion¶

Summary¶

Handwritten Digit Recognition with Artificial Neural Networks (ANNs)¶

Setting up the Notebook¶

Make Required Imports¶

Download MNIST Dataset¶

Checking & Setting Computing Device¶

Load & Prepare Dataset¶

Load Images & Labels¶

Generate Dataset & DataLoader¶

Create & Train Model¶

Create ANN Model Architecture¶

Model Training¶

Definition of Auxiliary Methods¶

Definition of Loss Function and Optimizer¶

Training Loop¶

Making Predictions¶

Error Analysis¶

Discussion¶

Summary¶

Generate `Dataset` & `DataLoader`¶