import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.optim.lr_scheduler import ExponentialLR
# Get CPU or GPU device for training
device = "cuda" if torch.cuda.is_available() else "cpu"
device = torch.device(device)
# Random seed for reproducibility
seed = 42
torch.manual_seed(seed)
# Save the model at the end?
save_model = False
# Batch sizes for training and testing
batch_size = 64
test_batch_size = 14
# Training epochs
n_epochs = 10
# Learning rate
learning_rate = 1.0
# Decay rate for adjusting the learning rate
gamma = 0.7
# How many batches before logging training status
log_interval = 10
# Number of target classes in the MNIST data
num_classes = 10
train_kwargs = {'batch_size': batch_size}
test_kwargs = {'batch_size': test_batch_size}
# CUDA settings
if torch.cuda.is_available():
cuda_kwargs = {'num_workers': 1,
'pin_memory': True,
'shuffle': True}
train_kwargs.update(cuda_kwargs)
test_kwargs.update(cuda_kwargs)
# The scaled mean and standard deviation of the MNIST dataset (precalculated)
data_mean = 0.1307
data_std = 0.3081
# Convert input images to tensors and normalize
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((data_mean,), (data_std,))
])
# Get the MNIST data from torchvision
dataset1 = datasets.MNIST('../data', train=True, download=True,
transform=transform)
dataset2 = datasets.MNIST('../data', train=False,
transform=transform)
# Define the data loaders that will handle fetching of data
train_loader = torch.utils.data.DataLoader(dataset1,**train_kwargs)
test_loader = torch.utils.data.DataLoader(dataset2, **test_kwargs)
# Define the architecture of the neural network
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding='valid')
self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding='valid')
self.dropout1 = nn.Dropout(0.25)
self.dropout2 = nn.Dropout(0.5)
self.fc1 = nn.Linear(9216, 128)
self.fc2 = nn.Linear(128, num_classes)
def forward(self, x):
x = self.conv1(x)
x = F.relu(x)
x = self.conv2(x)
x = F.relu(x)
x = F.max_pool2d(x, 2)
x = self.dropout1(x)
x = torch.flatten(x, 1)
x = self.fc1(x)
x = F.relu(x)
x = self.dropout2(x)
x = self.fc2(x)
output = F.softmax(x, dim=1)
return output
def train(model, device, train_loader, optimizer, epoch, log_interval):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = F.cross_entropy(output, target)
loss.backward()
optimizer.step()
if batch_idx % log_interval == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item()))
def test(model, device, test_loader):
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
for data, target in test_loader:
data, target = data.to(device), target.to(device)
output = model(data)
# sum up batch loss
test_loss += F.nll_loss(output, target, reduction='sum').item()
# get the index of the max log-probability
pred = output.argmax(dim=1, keepdim=True)
correct += pred.eq(target.view_as(pred)).sum().item()
test_loss /= len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset)))
# Send the model to the device (CPU or GPU)
model = Net().to(device)
# Define the optimizer to user for gradient descent
optimizer = optim.Adadelta(model.parameters(), lr=learning_rate)
# Shrinks the learning rate by gamma every step_size
scheduler = ExponentialLR(optimizer, gamma=gamma)
# Train the model
for epoch in range(1, n_epochs + 1):
train(model, device, train_loader, optimizer, epoch, log_interval)
test(model, device, test_loader)
scheduler.step()
if save_model:
torch.save(model.state_dict(), "mnist_cnn_pytorch.ckpt")
def visualize_and_predict(model, device, data_loader):
model.eval()
with torch.no_grad():
# Extract the first batch of images and labels
data, target = next(iter(data_loader))
# Select the first image and label
img, label = data[0], target[0]
# Visualize the image
plt.imshow(img.squeeze(), cmap='gray')
plt.title(f'Actual Label: {label.item()}')
plt.show()
# Run inference
img = img.to(device)
output = model(img.unsqueeze(0)) # Add batch dimension
pred = output.argmax(dim=1, keepdim=True)
print(f'Predicted Label: {pred.item()}')
visualize_and_predict(model, device, test_loader)