Advertisement

[深度学习]半监督学习、无监督学习之Autoencoders自编码器(附代码)

阅读量:

目录

自编码器介绍

从零开始训练自编码器

验证模型训练结果

可视化结果

加载预训练模型


自编码器介绍

自编码器的结构简单,由Encoder和Decoder组成,Encoder产生的Latent variables是潜在变量,它是Decoder的输入。

自编码器的目标是寻找有意义的feature,用这些feature来代表输入变量,并且可以通过Decoder还原变量。

如果具体到某一个数据集MINIST,他的结构如下图所示,中间代表了卷积网络层或者全连接层。

PCA与自编码器对比:

自编码器更接近真实图像,因为它可以是非线性的。

自编码器与分类网络相比:

自编码器没有额外的label,属于无监督学习,而分类网络需要label,属于监督学习。

从零开始训练自编码器

数据集是MINIST,基于python的pytorch框架。

复制代码
 import os

    
 import torch
    
 import torch.nn as nn
    
 import torch.nn.functional as F
    
  
    
 # Parameter Settings
    
 latent_dims = 10
    
 num_eopchs = 50
    
 batch_size = 64
    
 capacity = 64
    
 learning_rate = 1e-3
    
 # use_gpu = True
    
 use_gpu = False
    
  
    
 # MNIST Data Loading
    
 import torchvision.transforms as tranforms
    
 from torch.utils.data import DataLoader
    
 from torchvision.datasets import MNIST
    
  
    
 img_transform = tranforms.Compose([
    
     tranforms.ToTensor(),
    
     tranforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    
 ])
    
  
    
 train_dataset = MNIST(root='./data/MINIST', download=True, train=True, transform=img_transform)
    
 train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    
  
    
 test_dataset = MNIST(root='./data/MINIST', download=True, train=False, transform=img_transform)
    
 test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)
    
  
    
  
    
  
    
 # Autoencoder Definition
    
 class Encoder(nn.Module):
    
     def __init__(self):
    
     super(Encoder, self).__init__()
    
     c = capacity
    
     self.conv1 = nn.Conv2d(in_channels=1, out_channels=c, kernel_size=4, stride=2, padding=1)  # out:c*14*14
    
     self.conv2 = nn.Conv2d(in_channels=c, out_channels=c * 2, kernel_size=4, stride=2, padding=1)  # out:c*2*7*7
    
     self.fc = nn.Linear(in_features=c * 2 * 7 * 7, out_features=latent_dims)
    
  
    
     def forward(self, x):
    
     x = F.relu(self.conv1(x))
    
     x = F.relu(self.conv2(x))
    
     x = x.view(x.size(0), -1)  # flatten batch of multi-channel feature maps to a batch of feature vectors
    
     x = self.fc(x)
    
     return x
    
  
    
  
    
 class Decoder(nn.Module):
    
     def __init__(self):
    
     super(Decoder, self).__init__()
    
     c = capacity
    
     self.fc = nn.Linear(in_features=latent_dims, out_features=c * 2 * 7 * 7)
    
     self.conv2 = nn.ConvTranspose2d(in_channels=c * 2, out_channels=c, kernel_size=4, stride=2, padding=1)
    
     self.conv1 = nn.ConvTranspose2d(in_channels=c, out_channels=1, kernel_size=4, stride=2, padding=1)
    
  
    
     def forward(self, x):
    
     x = self.fc(x)
    
     x = x.view(x.size(0), capacity * 2, 7,
    
                7)  # unflatten batch of feature vectors to a  batch of multi-channel feature maps
    
     x = F.relu(self.conv2(x))
    
     x = torch.tanh(self.conv1(x))  # last layer before output is tanh ,since the images are normalized and 0-centered
    
     return x
    
  
    
  
    
 class Autoencoder(nn.Module):
    
     def __init__(self):
    
     super(Autoencoder, self).__init__()
    
     self.encoder = Encoder()
    
     self.decoder = Decoder()
    
  
    
     def forward(self, x):
    
     latent = self.encoder(x)
    
     x_recon = self.decoder(latent)
    
     return x_recon
    
  
    
  
    
 autoencoder = Autoencoder()
    
 device = torch.device("cuda:0" if use_gpu and torch.cuda.is_available() else "cpu")
    
 autoencoder = autoencoder.to(device)
    
  
    
 num_params = sum(p.numel() for p in autoencoder.parameters() if p.requires_grad)
    
 print('Number of parameters:%d' % num_params)
    
  
    
 # Train Autoencoder
    
 optimizer = torch.optim.Adam(params=autoencoder.parameters(), lr=learning_rate, weight_decay=1e-5)
    
  
    
 # set to training mode
    
 autoencoder.train()
    
  
    
 train_loss_avg = []
    
 print('Training...')
    
 for epoch in range(num_eopchs):
    
     train_loss_avg.append(0)
    
     num_batches = 0
    
  
    
     for img_batch, _ in train_dataloader:
    
     img_batch = img_batch.to(device)
    
  
    
     # autoencoder reconstruction
    
     img_batch_recon = autoencoder(img_batch)
    
  
    
     # reconstrcution error
    
     loss = F.mse_loss(img_batch_recon, img_batch)
    
  
    
     # backpropagation
    
     optimizer.zero_grad()
    
     loss.backward()
    
  
    
     # one step of the optimizer(using the gradients form backpropagation)
    
     optimizer.step()
    
  
    
     train_loss_avg[-1] += loss.item()
    
     num_batches += 1
    
  
    
     train_loss_avg[-1] /= num_batches
    
     print("Epoch [%d  / %d] average reconstruction error:%f" % (epoch + 1, num_eopchs, train_loss_avg[-1]))
    
    
    
    
    python
    
    
![](https://ad.itadn.com/c/weblog/blog-img/images/2025-07-12/DSXfQkYnHCOox4NAqLj2TypR98ga.png)

验证模型训练结果

复制代码
 # Evaluate on The Set

    
  
    
 # set to evalution mode
    
 autoencoder.eval()
    
  
    
 test_loss_avg, num_batches = 0, 0
    
 for img_batch, _ in train_dataloader:
    
     img_batch = img_batch.to(device)
    
  
    
     # autoencoder reconstruction
    
     img_batch_recon = autoencoder(img_batch)
    
  
    
     # reconstrcution error
    
     loss = F.mse_loss(img_batch_recon, img_batch)
    
  
    
     test_loss_avg += loss.item()
    
     num_batches += 1
    
  
    
 test_loss_avg /= num_batches
    
 print('average reconstruction error:%f' % (test_loss_avg))
    
    
    
    
    python
    
    
![](https://ad.itadn.com/c/weblog/blog-img/images/2025-07-12/JpyveHBXQY7RsqF1lajAWfGCSOw3.png)

可视化结果

复制代码
 # Visualize Reconstructions

    
  
    
 import numpy as np
    
 import matplotlib.pyplot as plt
    
  
    
 plt.ion()
    
  
    
 import torchvision.utils
    
  
    
 autoencoder.eval()
    
  
    
  
    
 def to_img(x):
    
     x = 0.5 * (x + 1)
    
     x = x.clamp(0, 1)
    
     return x
    
  
    
  
    
 def show_image(img):
    
     img = to_img(img)
    
     npimg = img.numpy()
    
     plt.imshow(np.transpose(npimg, (1, 2, 0)))
    
  
    
  
    
 def visualise_output(images, model):
    
     with torch.no_grad():
    
     images = images.to(device)
    
     images = model(images)
    
     images = images.cpu()
    
     images = to_img(images)
    
     np_imagegrid = torchvision.utils.make_grid(images[0:100], 10, 5).numpy()
    
     plt.imshow(np.transpose(np_imagegrid, (1, 2, 0)))
    
     plt.show()
    
  
    
  
    
 images, labels = iter(test_dataloader).next()
    
  
    
 print('Original images')
    
 show_image(torchvision.utils.make_grid(images[0:100], 10, 5))
    
 plt.show()
    
  
    
 print('Autoencoder reconstruction')
    
 visualise_output(images,autoencoder)
    
    
    
    
    python
    
    
![](https://ad.itadn.com/c/weblog/blog-img/images/2025-07-12/8eAEyrqBIcovNlwktnuhS4jfWbJ5.png)

加载预训练模型

如果不想从零开始训练,这里有已经训练好的模型。只要将前面的训练部分代替就行了。

复制代码
 # Alternatively: Load Pre-Trained Autoencoder

    
  
    
 import urllib
    
  
    
 if not os.path.isdir('./pretrained'):
    
     os.makedirs('./pretrained')
    
 print("downloading...")
    
 urllib.request.urlretrieve("http://geometry.cs.ucl.ac.uk/creativeai/pretrained/autoencoder.pth",
    
                        "./pretrained/autoencoder.pth")
    
 autoencoder.load_state_dict(torch.load('./pretrained/autoencoder.pth', map_location='cpu'))
    
 print('done')
    
  
    
 # this is how the autoencoder parameters can be saved:
    
 # torch.save(autoencoder.state_dict(), './pretrained/my_autoencoder.pth')
    
    
    
    
    python
    
    
![](https://ad.itadn.com/c/weblog/blog-img/images/2025-07-12/ADnbTulrmcH0EBd73NSCwjKqgGe8.png)

全部评论 (0)

还没有任何评论哟~