GAN
数据集
香港中文大学,CeleA数据集,.html
总共202599张面部图片。
制作HDF5压缩格式文件
分层数据格式(hierarchical data format)是一种成熟的,开源的压缩数据格式,专门用于存储非常大量的数据。
在《PyTorch 生成对抗网络编程》[英]塔克里·拉希德一书中将数据处理成HDF5格式运行代码,本来想重构一下的。然后发现如果用ImgaeFloder直接读取的话基本要重构所有代码。还是制作HDF5吧!
import os
import zipfileimport h5py
import imageiohdf5_file = './celeba_aligned_small.h5py'total_images = 202599with h5py.File(hdf5_file, 'w') as hf: # 打开h5py文件,文件不存在则会创建文件count = 0with zipfile.ZipFile('img_align_celeba.zip', 'r') as zf:# 这个压缩文件里是一个文件夹img_align_celeba文件夹中有200000多张图片for i in zf.namelist(): # zf.namelist()返还压缩文件中的文件列表名# zf.namelist()[0]是'img_align_celeba/' 即文件夹路径# zf.namelist()[1]是'img_align_celeba/000001.jpg' 即文件夹下的文件路径if i[-4:] == '.jpg':ofile = zf.extract(i) # 解压单个文件至ofile中# 默认解压在当前文件夹即在'./'路径下创建img_align_celeba文件夹,把图片(i)放入文件夹中# ofile是解压后图片(i)的相对地址是一个字符串img = imageio.imread(ofile)# 使用imageio.imread读取图片,此时img打印出来是一个数组os.remove(ofile) # 用完即弃# 删除图片,不占存储空间hf.create_dataset('img_align_celeba/'+str(count)+'.jpg',data=img, compression='gzip', compression_opts=9)# compression是压缩方式, compression_opts是压缩程度的参数# 在celeba_aligned_small.h5py文件中生成组img_align_celeba,在组中保存img数组count = count + 1if count % 1000 == 0:print('images done ...', count)if count == total_images: # 取前所有图片break
生成人脸
上篇博文使用的是MLP(全连接神经网络),这次生成人脸换成了CNN(卷积神经网络)。并采用GPU加速,个人认为代码写的不是很好,没有使用多线程,导致训练巨慢,1060训练一轮要4个小时左右,3090也要三个小时左右一轮。抽时间把代码重构一遍,从读取文件开始。
import h5py
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset
import torch
import torch.nn as nndef crop_centre(img, new_width, new_height):height, width, _ = img.shapestartx = width // 2 - new_width // 2starty = height // 2 - new_height // 2return img[starty:starty + new_height, startx:startx + new_width, :]def generate_random_image(size):random_data = torch.rand(size)return torch.cuda.FloatTensor(random_data)def generate_random_seed(size):random_data = torch.randn(size)return random_dataclass CelebADataset(Dataset):def __init__(self, file):self.file_object = h5py.File(file, 'r')self.dataset = self.file_object['img_align_celeba']def __len__(self):return len(self.dataset)def __getitem__(self, index):if index >= len(self.dataset):raise IndexErrorimg = np.array(self.dataset[str(index) + '.jpg'])img = crop_centre(img, 128, 128)return torch.cuda.FloatTensor(img).permute(2, 0, 1).view(1, 3, 128, 128) / 255.0def plot_image(self, index):img = np.array(self.dataset[str(index) + '.jpg'])img = crop_centre(img, 128, 128)plt.imshow(img, interpolation='nearest')plt.show()# 构建鉴别器
class Discriminator(nn.Module):def __init__(self):# 初始化父类super(Discriminator, self).__init__()self.feature = nn.Sequential(nn.Conv2d(3, 256, kernel_size=8, stride=2),nn.BatchNorm2d(256),nn.GELU(),nn.Conv2d(256, 256, kernel_size=8, stride=2),nn.BatchNorm2d(256),nn.GELU(),nn.Conv2d(256, 3, kernel_size=8, stride=2),nn.GELU(),)self.classifier = nn.Sequential(nn.Linear(3 * 10 * 10, 1),nn.Sigmoid())self.loss_function = nn.BCELoss()# 创建优化器self.optimiser = torch.optim.Adam(self.parameters(), lr=0.01)self.counter = 0self.progress = []def forward(self, inputs):x = self.feature(inputs)x = x.view(-1)x = self.classifier(x)return xdef train(self, inputs, targets):outputs = self.forward(inputs)loss = self.loss_function(outputs, targets)# 每训练10此增加计数器self.counter += 1if self.counter % 10 == 0:self.progress.append(loss.item())if self.counter % 10000 == 0:print("counter = ", self.counter)# 清楚梯度,反向传播, 更新权重self.optimiser.zero_grad()loss.backward()self.optimiser.step()# 构建生成器
class Generator(nn.Module):def __init__(self):super(Generator, self).__init__()self.linear = nn.Sequential(nn.Linear(100, 3*11*11),nn.GELU(),)self.feature = nn.Sequential(nn.ConvTranspose2d(3, 256, kernel_size=8, stride=2),nn.BatchNorm2d(256),nn.GELU(),nn.ConvTranspose2d(256, 256, kernel_size=8, stride=2),nn.BatchNorm2d(256),nn.GELU(),nn.ConvTranspose2d(256, 3, kernel_size=8, stride=2, padding=1),nn.BatchNorm2d(3),nn.Sigmoid())# 创建优化器self.optimiser = torch.optim.Adam(self.parameters(), lr=0.01)self.counter = 0self.progress = []def forward(self, x):x = self.linear(x)x = x.view(1, 3, 11, 11)x = self.feature(x)return xdef train(self, D, inputs, targets): # 用鉴别器的损失来训练生成,discriminatorg_output = self.forward(inputs) # 生成器generator的输出(g_output)d_output = D.forward(g_output) # 分类器discriminator的输出,输入生成器generator的输出loss = D.loss_function(d_output, targets)self.counter += 1if self.counter % 10 == 0:self.progress.append(loss.item())self.optimiser.zero_grad()loss.backward()self.optimiser.step()if torch.cuda.is_available():torch.set_default_tensor_type(torch.cuda.FloatTensor)print('using cuda:', torch.cuda.get_device_name(0))device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# 现在浮点类型默认cuda张量
# x = generate_random_image(2)
# print(x.device)celeba_dataset = CelebADataset('celeba_aligned_small.h5py')
# celeba_dataset.plot_image(66) # 输出一张图试试
D = Discriminator()
G = Generator()
G.to(device)
D.to(device)
epoches = 1
for epoch in range(epoches):print('开始第', epoch+1, '轮', '*************'*4)for image_data in celeba_dataset:# 真实样本1,训练鉴别器D.train(image_data, torch.cuda.FloatTensor([1.0]))# 随机生成噪声,告诉鉴别器这是0D.train(generate_random_image((1, 3, 128, 128)), torch.cuda.FloatTensor([0.0]))# 训练生成器G.train(D, generate_random_seed(100), torch.cuda.FloatTensor([1.0]))# torch.save(G, 'face-cnn-1.pth')
生成图片
for i in range(6):output = G.forward(generate_random_seed(100))img = output.detach().permute(0,2,3,1).view(128,128,3).cpu().numpy()plt.subplot(2, 3, i+1)plt.imshow(img)
plt.show()
运行一轮我这1060跑了4个小时,设备好的炼丹师们可以多运行几轮试试。
感觉他这代码写的不咋好,而且用HDF5文件格式有17G大的数据,原始数据还不到1.5G
给他重构了一下:
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset
import torch
import torch.nn as nn
import torch.utils.data as Data
from torchvision import transforms
from torchvision.datasets import ImageFoldertrain_data_dir = r'face'
batchsize=32
train_data_transforms = transforms.Compose([transforms.CenterCrop(128), # 128transforms.ToTensor(),
])train_data = ImageFolder(train_data_dir, transform=train_data_transforms)train_data_loader = Data.DataLoader(train_data,batch_size=batchsize,shuffle=True,num_workers=32,drop_last=True
)# 构建鉴别器
class Discriminator(nn.Module):def __init__(self):# 初始化父类super(Discriminator, self).__init__()self.feature = nn.Sequential(nn.Conv2d(3, 256, kernel_size=8, stride=2),nn.BatchNorm2d(256),nn.GELU(),nn.Conv2d(256, 256, kernel_size=8, stride=2),nn.BatchNorm2d(256),nn.GELU(),nn.Conv2d(256, 3, kernel_size=8, stride=2),nn.GELU(),)self.classifier = nn.Sequential(nn.Linear(3 * 10 * 10, 1),nn.Sigmoid())def forward(self, inputs):x = self.feature(inputs)x = x.view(x.size(0), -1)x = self.classifier(x)return x# 构建生成器
class Generator(nn.Module):def __init__(self):super(Generator, self).__init__()self.linear = nn.Sequential(nn.Linear(100, 3*11*11),nn.GELU(),)self.feature = nn.Sequential(nn.ConvTranspose2d(3, 256, kernel_size=8, stride=2),nn.BatchNorm2d(256),nn.GELU(),nn.ConvTranspose2d(256, 256, kernel_size=8, stride=2),nn.BatchNorm2d(256),nn.GELU(),nn.ConvTranspose2d(256, 3, kernel_size=8, stride=2, padding=1),nn.BatchNorm2d(3),nn.Sigmoid())def forward(self, x):x = self.linear(x)x = x.view(batchsize, 3, 11, 11)x = self.feature(x)return xD = Discriminator()
G = Generator()
d_optimizer = torch.optim.SGD(D.parameters(), lr=0.01)
g_optimizer = torch.optim.SGD(G.parameters(), lr=0.01)
loss_func = nn.BCELoss()if torch.cuda.is_available():print('using cuda:', torch.cuda.get_device_name(0))D = D.cuda()G = G.cuda()if __name__ == '__main__':d_z_loss = []epoches = 25for epoch in range(epoches):print('开始第', epoch + 1, '轮', '*******************' * 3)sum_d_loss = 0for step, (b_x, _) in enumerate(train_data_loader): # b_x shape(32,3,128,128)# 训练辨别器,real datareal_out = D(b_x.cuda())real_out = real_out.squeeze() # (batch_size,1) -> (batchsize)real_label = torch.ones(batchsize).cuda()d_loss_real = loss_func(real_out, real_label) # 标签为1,为真实样本,辨别器的loss# fake datafake_img = G(torch.rand(batchsize, 100).cuda()).detach() fake_out = D(fake_img).squeeze()fake_label = torch.zeros(batchsize).cuda()d_loss_fake = loss_func(fake_out, fake_label)# 更新辨别器d_loss = (d_loss_real + d_loss_fake)sum_d_loss += d_loss.item()d_optimizer.zero_grad() # 在反向传播之前,先将梯度归0d_loss.backward() # 将误差反向传播d_optimizer.step() # 更新参数# 训练生成器fake_img = G(torch.rand(batchsize, 100).cuda())output = D(fake_img).squeeze()g_loss = loss_func(output, real_label)g_optimizer.zero_grad() # 梯度归0g_loss.backward() # 进行反向传播g_optimizer.step() # .step()一般用在反向传播后面,用于更新生成网络的参数fake_img = G(torch.rand(batchsize, 100).cuda())output = D(fake_img).squeeze()g_loss = loss_func(output, real_label)g_optimizer.zero_grad() # 梯度归0g_loss.backward() # 进行反向传播g_optimizer.step() # .step()一般用在反向传播后面,用于更新生成网络的参数print('{:.5%}'.format(step / len(train_data_loader)))d_z_loss.append(sum_d_loss / len(train_data_loader))print(d_z_loss)if epoch % 2 == 0:name = 'face'+str(epoch)+'.pth'torch.save(G, name, _use_new_zipfile_serialization=False)torch.save(D,'Generator.pth', _use_new_zipfile_serialization=False)
发布评论