本来打算利用AlexNet对cifar-10进行分类,但是因为这个数据集里面的图片是32*32的,要是网络结构完全按照AlexNet的思路就可能卷没了。因此我一开始稍微调整了一下AlexNet的网络层参数,然后跑了一下,虽然利用了GPU加速,每运行一次迭代仍然需要将近5分钟,在迭代了20次后预测准确率还在60%徘徊。
因此我更改了网络结构,由于图片像素值较小,局部特征可能并没有那么多,因此我感觉较深的卷积可能不太利于特征的提取,因此我将卷积核的数目减少了很多,并只设置了两个卷积池化层。
这一部分的pytorch代码如下:
nn.Conv2d(in_channels=3,out_channels=64,kernel_size=5,stride=1,padding=2,bias=False),# (32-5+4)/1+1=32 nn.ReLU(inplace=True),nn.MaxPool2d(kernel_size=3,stride=2,padding=0), (32-3)/2+1=15 nn.Conv2d(in_channels=64,1)"> (15-5+4)/1+1=15 nn.ReLU(inplace= (15-3)/2+1=7
在用两层卷积池化进行特征提取之后,就可以利用全连接层进行分类了,这一部分的参数我根据上一部分得到的输出结果大小也做了调整。
self.feature_classify = torch.nn.Sequential( nn.Dropout(p=0.25),nn.Linear(in_features=64* 7 * 7,out_features=384=384,out_features=192
利用魔改的CNN进行分类后预测结果如下:
采用了GPU加速 Files already downloaded and verified Files already downloaded verified epoch:1,acc:0.2820,loss:768.9158 epoch:1,test_acc:0.3766,test_loss:135.6541 epoch:2,acc:0.4303,loss:620.1400 epoch:3,acc:0.4923,loss:555.9510 epoch:4,acc:0.5407,loss:509.0777 epoch:5,acc:0.5817,loss:466.2306 epoch:6,acc:0.6119,loss:435.5047 epoch:7,acc:0.6415,loss:406.7112 epoch:8,acc:0.6604,loss:383.8425 epoch:9,acc:0.6766,loss:365.9761 epoch:10,acc:0.6944,loss:347.8030 epoch:11,acc:0.7059,loss:335.6827 epoch:12,acc:0.7164,loss:323.1924 epoch:13,acc:0.7281,loss:310.9101 epoch:14,acc:0.7335,loss:304.4421 epoch:15,acc:0.7413,loss:294.5465 epoch:16,acc:0.7502,loss:286.1364 epoch:17,acc:0.7562,loss:279.5722 epoch:18,acc:0.7588,loss:274.7319 epoch:19,acc:0.7670,loss:265.4509 epoch:20,acc:0.7703,loss:261.4555 epoch:21,acc:0.7752,loss:255.8207 epoch:22,acc:0.7802,loss:250.5966 epoch:23,acc:0.7857,loss:245.0714502 epoch:24,acc:0.7886,loss:240.5701 epoch:25,acc:0.7923,loss:235.4422代码如下:import torch torch.nn.functional as F matplotlib.pyplot as plt from torch nn from torchvision datasets,transforms from torch.utils.data DataLoader EPOCH = 25 if torch.cuda.is_available(): device = torch.device("cuda") print(采用了GPU加速) else: device = torch.device(cpuGPU无法正常使用) transform = transforms.Compose([ transforms.ToTensor(),transforms.Normalize((0.5,0.5,0.5),(0.5,0.5)) ]) train_set = datasets.CIFAR10('../pytorch_',train=True,download=True,transform=transform) test_set = datasets.CIFAR10(transform) train_data = DataLoader(train_set,batch_size=128,shuffle=True) torch.Size([128]) :y # torch.Size([128,3,32,32]) :x test_data = DataLoader(test_set,1)">True) class AlexNet(torch.nn.Module): def __init__(self): super(AlexNet,self).() self.feature_extraction = torch.nn.Sequential( nn.Conv2d(in_channels=3,out_channels=96,kernel_size=3,bias=False), # # (32-3+4)/2+1=17 # nn.ReLU(inplace=True),1)"> # nn.MaxPool2d(kernel_size=3,# (17-3)/1+1=15 # nn.Conv2d(in_channels=96,out_channels=192,1)"> # # (15-5+4)/1+1=15 # nn.Conv2d(in_channels=192,out_channels=384,padding=1,1)"> # # (13-3+2)/1+1=13 # nn.Conv2d(in_channels=384,out_channels=256,1)"> # nn.Conv2d(in_channels=256,# (13-3)/2+1=6 nn.Conv2d(in_channels=3,1)"> (15-3)/2+1=7 ) self.feature_classify =def forward(self,x): x = self.feature_extraction(x) x = x.view(x.size(0),64 * 7*7) x = self.feature_classify(x) return x net = AlexNet() net = net.to(device) optimizer = torch.optim.SGD(net.parameters(),lr=0.02) loss_func = torch.nn.CrossEntropyLoss() train(model,epoch): model.train() loss_all = 0 acc_all = 0 for i,(data,target) in enumerate(train_data): data,target = data.to(device),target.to(device) out = net(data) print(out.size()) print(target.size()) loss = loss_func(out,target) optimizer.zero_grad() loss.backward() optimizer.step() _,pre = out.max(1) train_correct = (pre == target).sum().float() acc = train_correct / data.shape[0] loss_all += float(loss) acc_all += acc epoch:{},acc:{:.4f},loss:{:.4f}'.format(epoch + 1,acc_all / len(train_data),loss_all)) test(model,epoch): model.eval() loss_all = enumerate(test_data): data,target) _,1)">) test_correct = (pre == target).sum().float() acc = test_correct / len(test_data),1)"> main(): for i range(EPOCH): train(net,i) test(net,i) if __name__ == __main__': main() if __name__=='__main__': net=AlexNet() print(net) input=torch.rand(128,32) out=net(input) print(out) print(out.shape) for t,(x,y) in enumerate(train_data): if t==0: print(y) print(y.size()) print(x.size())原文链接:https://www.f2er.com/imageprocessing/994910.html