当前位置:首页 >> 编程语言 >> 【Pytorch】Transposed Convolution,i52500

【Pytorch】Transposed Convolution,i52500

0evadmin 编程语言 1
文件名:【Pytorch】Transposed Convolution,i52500 【Pytorch】Transposed Convolution

文章目录 1 卷积2 反/逆卷积3 MaxUnpool / ConvTranspose4 encoder-decoder5 可视化

学习参考来自:

详解逆卷积操作–Up-sampling with Transposed Convolution

PyTorch使用记录

https://github.com/naokishibuya/deep-learning/blob/master/python/transposed_convolution.ipynb

1 卷积

输入 卷积核

步长为 1,卷起来形式如下

输出的每个结果和输入的 9 个数值有关系

更直观的写成如下展开的矩阵乘形式

填零和 stride 与 kernel size 有关

2 反/逆卷积

相比逆卷积 (Deconvolution),转置卷积 (Transposed Convolution) 是一个更为合适的叫法

上述过程反过来,输入的一个数值与输出的 9 个数值有关

把原来的 W W W 转置一下即可实现该功能,当然转置后的 W W W 也是需要去学习更新的

矩阵乘可以看到,输入的每个值影响到了输出的 9 个值

3 MaxUnpool / ConvTranspose

搞个代码简单的看看效果

"maxpool"m = nn.MaxPool2d(kernel_size=2, stride=2, padding=0, return_indices=True)input_data = torch.tensor([[[[1, 2, 8, 7],[3, 4, 6, 5],[9, 10, 16, 15],[13, 14, 12, 11]]]], dtype=torch.float32)print(input_data.shape) # torch.Size([1, 1, 4, 4])out, indices = m(input_data)print(out, "\n", indices)

output

tensor([[[[ 4., 8.],[14., 16.]]]]) tensor([[[[ 5, 2],[13, 10]]]])

"maxuppooling"n = nn.MaxUnpool2d(kernel_size=2, stride=2, padding=0)out = n(out, indices, output_size=input_data.size())print(out)

output

tensor([[[[ 0., 0., 8., 0.],[ 0., 4., 0., 0.],[ 0., 0., 16., 0.],[ 0., 14., 0., 0.]]]])

在使用 MaxUnpool 的时候要特别注意, 需要在 maxpool 的时候保存 indices. 否则会报错

下面看看其在网络中的简单应用

import torch.nn as nnimport torch"MaxUnpool"class ConvDAE(nn.Module):def __init__(self):super().__init__()# input: batch x 3 x 32 x 32 -> output: batch x 16 x 16 x 16self.encoder = nn.Sequential(nn.Conv2d(3, 16, 3, stride=1, padding=1), # batch x 16 x 32 x 32nn.ReLU(),nn.BatchNorm2d(16),nn.MaxPool2d(2, stride=2, return_indices=True))self.unpool = nn.MaxUnpool2d(2, stride=2, padding=0)self.decoder = nn.Sequential(nn.ConvTranspose2d(16, 16, 3, stride=2, padding=1, output_padding=1),nn.ReLU(),nn.BatchNorm2d(16),nn.ConvTranspose2d(16, 3, 3, stride=1, padding=1, output_padding=0),nn.ReLU())def forward(self, x):out, indices = self.encoder(x) # torch.Size([1, 16, 16, 16])out = self.unpool(out, indices) # torch.Size([1, 16, 32, 32])out = self.decoder(out) # torch.Size([1, 3, 64, 64])return outif __name__ == "__main__":DAE = ConvDAE()x = torch.randn((1, 3, 32, 32))DAE(x)

网络结构比较简单,encoder 降低图片分辨率至 1/2,通道数不变

unpool 反 max pooling 恢复图片分辨率

decoder 反卷积提升图片分辨率

4 encoder-decoder

再看一个稍微复杂的 encoder-decoder 结构

class autoencoder(nn.Module):def __init__(self):super(autoencoder, self).__init__()# -------# encode# -------self.encode1 = nn.Sequential(# 第一层nn.Conv1d(kernel_size=25, in_channels=1, out_channels=32, stride=1, padding=12), # (1,784)->(32,784)nn.BatchNorm1d(32), # 加上BN的结果nn.ReLU(),nn.MaxPool1d(kernel_size=3, stride=3, padding=1, return_indices=True), # (32,784)->(32,262))self.encode2 = nn.Sequential(# 第二层nn.Conv1d(kernel_size=25, in_channels=32, out_channels=64, stride=1, padding=12), # (32,262)->(64,262)nn.BatchNorm1d(64),nn.ReLU(),nn.MaxPool1d(kernel_size=3, stride=3, padding=1, return_indices=True), # (batchsize,64,262)->(batchsize,64,88))self.encode3 = nn.Sequential(nn.Linear(in_features=88*64, out_features=1024),nn.Linear(in_features=1024, out_features=30))# -------# decode# -------self.unpooling1 = nn.MaxUnpool1d(kernel_size=3, stride=3, padding=1) # (batchsize,64,262)<-(batchsize,64,88)self.unpooling2 = nn.MaxUnpool1d(kernel_size=3, stride=3, padding=1) # (32,784)<-(32,262)self.decode1 = nn.Sequential(# 第一层nn.ReLU(),nn.BatchNorm1d(64),nn.ConvTranspose1d(kernel_size=25, in_channels=64, out_channels=32, stride=1, padding=12), # (32,262)<-(64,262))# 第二层self.decode2 = nn.Sequential(nn.ReLU(),nn.BatchNorm1d(32), # 加上BN的结果nn.ConvTranspose1d(kernel_size=25, in_channels=32, out_channels=1, stride=1, padding=12), # (1,784)<-(32,784))self.decode3 = nn.Sequential(nn.Linear(in_features=30, out_features=1024),nn.Linear(in_features=1024, out_features=88*64))def forward(self, x):# encodex = x.view(x.size(0),1,-1) # 将图片摊平 torch.Size([1, 1, 784])x,indices1 = self.encode1(x) # 卷积层 torch.Size([1, 32, 262])x,indices2 = self.encode2(x) # 卷积层 torch.Size([1, 64, 88])x = x.view(x.size(0), -1) # 展开 torch.Size([1, 5632])x = self.encode3(x) # 全连接层 torch.Size([1, 30])# decodex = self.decode3(x) # torch.Size([1, 5632])x = x.view(x.size(0), 64, 88) # torch.Size([1, 64, 88])x = self.unpooling1(x, indices2) # torch.Size([1, 64, 262])x = self.decode1(x) # torch.Size([1, 32, 262])x = self.unpooling2(x, indices1) # torch.Size([1, 32, 784])x = self.decode2(x) # torch.Size([1, 1, 784])return xif __name__ == "__main__":x = torch.randn((1, 1, 28, 28))autoencoder = autoencoder()autoencoder(x)

结构草图如下所示

主要展示的是 nn.ConvTranspose 与 nn.MaxUnpool 的运用,nn.MaxUnpool 要记得 indices

应用主要是 1d,2d 同理可以拓展

5 可视化

简单的实验,输入 MNIST 原始图片,conv+max pooling 下采样,maxunpooling+transposed conv 回原图,看看效果

载入相关库,载入数据集

import torchimport torch.nn as nnimport torchvisionimport torchvision.transforms as transformsimport numpy as npimport cv2import matplotlib.pyplot as plt# Device configurationdevice = torch.device('cuda' if torch.cuda.is_available() else 'cpu')# Hyper-parametersnum_epochs = 5batch_size = 100learning_rate = 0.001# MNIST datasettrain_dataset = torchvision.datasets.MNIST(root='./',train=True,transform=transforms.ToTensor(),download=True)test_dataset = torchvision.datasets.MNIST(root='./',train=False,transform=transforms.ToTensor())# Data loadertrain_loader = torch.utils.data.DataLoader(dataset=train_dataset,batch_size=batch_size,shuffle=True)test_loader = torch.utils.data.DataLoader(dataset=test_dataset,batch_size=batch_size,shuffle=False)

图像可视化的前期工作

def imshow(img):npimg = img.numpy()plt.imshow(np.transpose(npimg, (1, 2, 0)))

搭建神经网络,及其初始化

# 搭建网络class CNNMNIST(nn.Module):def __init__(self):super(CNNMNIST,self).__init__()self.conv1 = nn.Conv2d(in_channels=1,out_channels=1,kernel_size=3,stride=1,padding=0)self.pool1 = nn.MaxPool2d(kernel_size=2,stride=2,padding=0,return_indices=True)self.unpool1 = nn.MaxUnpool2d(kernel_size=2,stride=2,padding=0)self.unconv1 = nn.ConvTranspose2d(in_channels=1, out_channels=1, kernel_size=3, stride=1, padding=0)def forward(self,x):# encodeout = self.conv1(x) # torch.Size([100, 1, 26, 26])out,indices = self.pool1(out) # torch.Size([100, 1, 13, 13])# deocdeout = self.unpool1(out,indices,output_size=out1.size()) # torch.Size([100, 1, 26, 26])out = self.unconv1(out) # torch.Size([100, 1, 28, 28])return out# 网络的初始化model = CNNMNIST().to(device)print(model)

output

CNNMNIST((conv1): Conv2d(1, 1, kernel_size=(3, 3), stride=(1, 1))(pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)(unpool1): MaxUnpool2d(kernel_size=(2, 2), stride=(2, 2), padding=(0, 0))(unconv1): ConvTranspose2d(1, 1, kernel_size=(3, 3), stride=(1, 1)))

网络训练与保存

# 定义优化器和损失函数criterion = nn.MSELoss(reduction='mean')optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)# 进行训练model.train()total_step = len(train_loader)for epoch in range(num_epochs):for i, (images, labels) in enumerate(train_loader):# Move tensors to the configured deviceimages = images.to(device)# Forward passoutputs = model(images)loss = criterion(outputs, images)# Backward and optimizeoptimizer.zero_grad()loss.backward()optimizer.step()if (i+1) % 100 == 0:# 计算Lossprint('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, i+1, total_step, loss.item()))"save model"torch.save(model, "model.pkl")

output

Epoch [1/5], Step [100/600], Loss: 0.0764Epoch [1/5], Step [200/600], Loss: 0.0549Epoch [1/5], Step [300/600], Loss: 0.0457Epoch [1/5], Step [400/600], Loss: 0.0468Epoch [1/5], Step [500/600], Loss: 0.0443Epoch [1/5], Step [600/600], Loss: 0.0452Epoch [2/5], Step [100/600], Loss: 0.0445Epoch [2/5], Step [200/600], Loss: 0.0427Epoch [2/5], Step [300/600], Loss: 0.0407Epoch [2/5], Step [400/600], Loss: 0.0432Epoch [2/5], Step [500/600], Loss: 0.0414Epoch [2/5], Step [600/600], Loss: 0.0413Epoch [3/5], Step [100/600], Loss: 0.0415Epoch [3/5], Step [200/600], Loss: 0.0420Epoch [3/5], Step [300/600], Loss: 0.0425Epoch [3/5], Step [400/600], Loss: 0.0413Epoch [3/5], Step [500/600], Loss: 0.0416Epoch [3/5], Step [600/600], Loss: 0.0414Epoch [4/5], Step [100/600], Loss: 0.0401Epoch [4/5], Step [200/600], Loss: 0.0409Epoch [4/5], Step [300/600], Loss: 0.0418Epoch [4/5], Step [400/600], Loss: 0.0412Epoch [4/5], Step [500/600], Loss: 0.0407Epoch [4/5], Step [600/600], Loss: 0.0405Epoch [5/5], Step [100/600], Loss: 0.0411Epoch [5/5], Step [200/600], Loss: 0.0412Epoch [5/5], Step [300/600], Loss: 0.0406Epoch [5/5], Step [400/600], Loss: 0.0407Epoch [5/5], Step [500/600], Loss: 0.0409Epoch [5/5], Step [600/600], Loss: 0.0401

模型载入,可视化结果

"load model"model = torch.load("model.pkl")"visual"dataiter = iter(train_loader)images, lables = dataiter.next()imshow(torchvision.utils.make_grid(images, nrow=10))plt.show()images = images.to(device)# Forward passoutputs = model(images)imshow(torchvision.utils.make_grid(outputs.cpu().squeeze(0), nrow=10))plt.show()

MNIST 多图的可视化,可以借鉴借鉴,核心代码为 torchvision.utils.make_grid

部分输入 部分输出

换成纯卷积的失真率更少

class CNNMNIST(nn.Module):def __init__(self):super(CNNMNIST,self).__init__()self.conv1 = nn.Conv2d(in_channels=1,out_channels=1,kernel_size=3,stride=1,padding=0)self.conv2 = nn.Conv2d(in_channels=1,out_channels=1,kernel_size=2,stride=2,padding=0)self.unconv1 = nn.ConvTranspose2d(in_channels=1, out_channels=1, kernel_size=2, stride=2, padding=0)self.unconv2 = nn.ConvTranspose2d(in_channels=1, out_channels=1, kernel_size=3, stride=1, padding=0)def forward(self,x):# encodeout = self.conv1(x) # torch.Size([100, 1, 26, 26])out = self.conv2(out) # torch.Size([100, 1, 13, 13])# deocdeout = self.unconv1(out) # torch.Size([100, 1, 26, 26])out = self.unconv2(out) # torch.Size([100, 1, 28, 28])return out

输入

输出

协助本站SEO优化一下,谢谢!
关键词不能为空
同类推荐
«    2025年12月    »
1234567
891011121314
15161718192021
22232425262728
293031
控制面板
您好,欢迎到访网站!
  查看权限
网站分类
搜索
最新留言
文章归档
网站收藏
友情链接