携手创造,共同成长!这是我参与「日新方案 8 月更文应战」的第8天,点击检查活动概况
前言
挑选有一个有代表网络结构,然后从头到尾完成了一遍这个网络结构,这样才会对这个网络结构有一个比较深印象。关于同类其他网络结构也会有感觉。带着这种主意今天就自己动手完成一个完好 YOLOv4 的网络。
这里是在 github 上挑选一个完成的 Pytorch 版本的项目,下载下来做参阅,主要是先仿照,然后再去优化,构成自己东西。
import torch
import torch.nn as nn
import torch.nn.functional as F
Conv 基类
这个网络依据卷积,通常咱们提到的卷积块会包含一个卷积层、BN 层和激活层,依据激活层不同在模型分为 CBM 和 CBL,他们别离选用激活函数是 Mish 和 LeakyReLU
class Conv(nn.Module):
def __init__(self,in_channels,out_channels,kernel_size,stride,bn=True,bias=False):
super().__init__()
pad =(kernel_size - 1) // 2
self.conv = nn.ModuleList()
if bias:
self.conv.append(nn.Conv2d(in_channels,out_channels,kernel_size,stride,pad))
else:
self.conv.append(nn.Conv2d(in_channels,out_channels,kernel_size,stride,pad,bias=False))
if bn:
self.conv.append(nn.BatchNorm2d(out_channels))
def forward(self,x):
for l in self.conv:
x = l(x)
return x
pad=kernelsize−12pad = \frac{kernel size – 1}{2}
完成 CBM
Mish 激活函数
class Mish(nn.Module):
def __init__(self):
super().__init__()
def forward(self,x):
return x * (torch.tanh(F.softplus(x)))
class CBM(Conv):
def __init__(self,in_channels,out_channels,kernel_size,stride,bn=True,bias=False):
super().__init__(in_channels,out_channels,kernel_size,stride,bn=bn,bias=bias)
self.conv.append(Mish())
CBL
在 CBL 块选用 LeackyReLu 激活函数
class CBL(Conv):
def __init__(self,in_channels,out_channels,kernel_size,stride,bn=True,bias=False):
super().__init__(in_channels,out_channels,kernel_size,stride,bn=bn,bias=bias)
self.conv.append(nn.LeakyReLU(0.1,inplace=True))
ResBlock
class ResBlock(nn.Module):
def __init__(self,ch,nblocks=1,shortcut=True):
super().__init__()
self.shortcut = shortcut
self.module_list = nn.ModuleList()
for i in range(nblocks):
resblock = nn.ModuleList()
resblock.append(CBM(ch,ch,1,1))
resblock.append(CBM(ch,ch,3,1))
self.module_list.append(resblock)
def forward(self,x):
for m in module_list:
h = x
for res in m:
h = res(h)
x = x + h if self.shortcut else h
return x
class Upsample(nn.Module):
def __init__(self):
super(Upsample, self).__init__()
def forward(self, x, target_size, inference=False):
assert (x.data.dim() == 4)
if inference:
x_1 = x.view(x.size(0), x.size(1), x.size(2), 1, x.size(3), 1)
#print(x_1.shape)
x_2 = x_1.expand(x.size(0), x.size(1), x.size(2), target_size[2] // x.size(2), x.size(3), target_size[3] // x.size(3))
#print(x_2.shape)
x_3 = x_2.contiguous().view(x.size(0), x.size(1), target_size[2], target_size[3])
#print(x_3.shape)
return x_3
else:
return F.interpolate(x, size=(target_size[2], target_size[3]), mode='nearest')
当在推理过程中,是将特征图
backbone_output = torch.randn(2,256,13,13)
backbone_output.shape
backbone_output_2 = torch.randn(2,256,26,26)
upsample = Upsample()
upsample_output = upsample(backbone_output,backbone_output_2.size())
print(upsample_output.shape) #torch.Size([2, 256, 26, 26])
由于 CSP1 比较特别所以拿出来单独完成,其他
class CSP1(nn.Module):
def __init__(self):
#input 416 x 416 x 3-> 416 x 416 x 32
self.conv1 = CBM(3,32,3,1)
#416 x 416 x 32 -> 208 x 208 x 64
self.conv2 = CBM(32,64,3,2)
#208 x 208 x 64 -> 208 x 208 x 64
self.conv3 = CBM(64,64,1,1)
# [route]
# layers = -2
#208 x 208 x 64 -> 208 x 208 x 64
self.conv4 = CBM(64,64,1,1)
#208 x 208 x 64 -> 208 x 208 x 64
self.conv5 = CBM(64,32,1,1)
self.conv6 = CBM(32,64,1,1)
#[shortcut]
#from=-3
#activation = linear
self.conv7 = CBM(64,64,1,1)
#[route]
#layers =-1,-7
self.conv8 = CBM(128,64,1,1)
def forward(self,input):
x1 = self.conv1(input)
x2 = self.conv2(x1)
x3 = self.conv3(x2)
# route -2
# 向上数 2 层,也便是经过残差块的那个结构
x4 = self.conv4(x2)
x5 = self.conv5(x4)
x6 = self.conv6(x5)
#shortcut -3
x6 = x6 + x4
#transition
x7 = self.conv7(x6)
# [route]
# layer = -1, -7
x7 = torch.cat([x7,x3])
x8 = self.conv8(x7)
return x8
class CSP(nn.Module):
def __init__(self,in_channels):
self.conv1 = CBM(in_channels,in_channels*2,3,2)
# [route] -2
# CSP 经过 2 个 1x1 卷积对通道进行拆分
self.conv2 = CBM(in_channels*2,in_channels,1,1)
self.conv3 = CBM(in_channels*2,in_channels,1,1)
self.resblock = ResBlock(ch=in_channels,nblocks=2)
#[shortcut] -3
self.conv4 = CBM(in_channels,in_channels,1,1)
#[route] -1 -10
self.conv5 = CBM(in_channels*2,in_channels*2,1,1)
def forward(self,input):
x1 = self.conv1(input)
x2 = self.conv2(x1)
x3 = self.conv3(x1)
r = self.resblock(x3)
x4 = self.conv4(r)
x4 = torch.cat([x4,x2],dim=1)
x5 = self.conv5(x4)
return x5
Neck 模块完成
class Neck(nn.Module):
def __init__(self,inference=False):
# 是否为推理
self.inference = inference
#ConvSet1
self.conv1 = CBL(1024,512,1,1)
self.conv2 = CBL(512,1024,3,1)
self.conv3 = CBL(1024,512,1,1)
#SPP
self.maxpool1 = nn.MaxPool2d(kernel_size=5, stride=1, padding=5//2)
self.maxpool2 = nn.MaxPool2d(kernel_size=9, stride=1, padding=9//2)
self.maxpool3 = nn.MaxPool2d(kernel_size=13, stride=1, padding=13//2)
#convSet2
self.conv4 = CBL(2048,512,1,1)
self.conv5 = CBL(512,1024,3,1)
self.conv6 = CBL(1024,512,1,1)
#
self.conv7 = CBL(512,256,1,1)
# UP
self.upsample1 = Upsample()
# R 85
self.conv8 = CBL(512,256,1,1)
#convSet3
self.conv9 = CBL(512,256,1,1)
self.conv10 = CBL(256,512,3,1)
self.conv11 = CBL(512,256,1,1)
self.conv12 = CBL(256,512,3,1)
self.conv13 = CBL(512,256,1,1)
self.conv14 = CBL(256,128,1,1)
self.upsample2 = Upsample()
self.conv15 = CBL(256,128,1,1)
#convSet4
self.conv16 = CBL(256,128,1,1)
self.conv17 = CBL(128,256,3,1)
self.conv18 = CBL(256,128,1,1)
self.conv19 = CBL(128,256,3,1)
self.conv20 = CBL(256,128,1,1)
def forward(self,input,csp4,csp3):
#convSet1
x1 = self.conv1(input)
x2 = self.conv2(x1)
x3 = self.conv3(x2)
# SPP
m1 = self.maxpool1(x3)
m2 = self.maxpool1(x3)
m3 = self.maxpool1(x3)
spp = torch.cat([m3,m2,m1,x3],dim=1)
# SPP end
#convSet2
x4 = self.conv4(spp)
x5 = self.conv5(x4)
#featuremap(13x13x1024)
x6 = self.conv6(x5)
x7 = self.conv7(x6)
#UP
up = self.upsample1(x7, csp4.size(),self.inference)
#downsample4 stride 16
x8 = self.conv8(csp4)
x8 = torch.cat([x8,up],dim=1)
#convSet3
x9 = self.conv9(x8)
x10 = self.conv10(x9)
x11 = self.conv11(x10)
x12 = self.conv12(x11)
#featuremap(26x26x256)
x13 = self.conv13(x12)
x14 = self.conv14(x13)
up = self.upsample2(x14,csp3.size(),self.inference)
#downsample3 stride 8
x15 = self.conv15(csp3)
x15 = torch.cat([x15,up],dim=1)
# convSet4
x16 = self.conv16(x15)
x17 = self.conv17(x16)
x18 = self.conv18(x17)
x19 = self.conv19(x18)
# featuremap(52x52x128)
x20 = self.conv20(x19)
return x20, x13, x6
class YoloLayer(nn.Module):
def __init__(self,anchor_mask=[],
num_classes=0,
anchors=[],
num_anchors=1,
stride=32,
model_out=False):
pass
def get_region_boxes(inputs):
pass
Head 模块完成
在 Head 部分主要经过将低层和高层交融后得到特征图再做下采样来和高层(低分辨)输出做交融后进行输出,这里用 1×1 卷积。
class Yolov4Head(nn.Module):
def __init__(self,output_ch,n_classes,anchors,inference=False):
super().__init__()
self.inference = inference
self.anchors = anchors
# 52x52 特征图输出
self.conv1 = CBL(128,256,3,1)
# 1x1 conv detector
self.conv2 = Conv(256,output_ch,1,1,bn=False,bias=True)
self.yolo1 = YoloLayer(
anchor_mask=[0,1,2],
num_classes=n_classes,
anchors=anchors,
num_anchors=9,stride=8
)
# downsmaple
self.conv3 = CBL(128,256,3,2)
#convSet5
self.conv4 = CBL(512,256,1,1)
self.conv5 = CBL(256,512,3,1)
self.conv6 = CBL(512,256,1,1)
self.conv7 = CBL(256,512,3,1)
self.conv8 = CBL(512,256,1,1)
self.conv9 = CBL(256,512,3,1)
# 26 x 26 detector
self.conv10 = Conv(256,output_ch,1,1,bn=False,bias=True)
self.yolo2 = YoloLayer(
anchor_mask=[3,4,5],
num_classes=n_classes,
anchors=anchors,
num_anchors=9,stride=16
)
self.conv11 = CBL(256,512,3,2)
#convSet6
self.conv12 = CBL(1024,512,1,1)
self.conv13 = CBL(512,1024,3,1)
self.conv14 = CBL(1024,512,1,1)
self.conv15 = CBL(512,1024,3,1)
self.conv16 = CBL(1024,512,1,1)
self.conv17 = CBL(512,1024,3,1)
self.conv10 = Conv(1024,output_ch,1,1,bn=False,bias=True)
self.yolo3 = YoloLayer(
anchor_mask=[6,7,8],
num_classes=n_classes,
anchors=anchors,
num_anchors=9,stride=32
)
def forward(self,input1,input2,input3):
x1 = self.conv1(input1)
x2 = self.conv2(x1)
x3 = self.conv3(input1)
x3 = torch.cat([x3,input2],dim=1)
#convSet5
x4 = self.conv4(x3)
x5 = self.conv5(x4)
x6 = self.conv6(x5)
x7 = self.conv7(x6)
x8 = self.conv8(x7)
x9 = self.conv9(x8)
x10 = self.conv10(x9)
x11 = self.conv11(x10)
x11 = torch.cat([x11,input3],dim=1)
x12 = self.conv12(x11)
x13 = self.conv13(x12)
x14 = self.conv14(x13)
x15 = self.conv15(x14)
x16 = self.conv16(x15)
x17 = self.conv17(x16)
x18 = self.conv18(x17)
if self.inference:
y1 = self.yolo1(x2)
y2 = self.yolo2(x10)
y3 = self.yolo3(x18)
return get_region_boxes([y1,y2,y3])
else:
return [x2,x10,x18]
class Yolov4(nn.Module):
def __init__(self, yolov4conv137weight=None, n_classes=80, inference=False):
super().__init__()
#输出通道数,也便是
output_ch = (4 + 1 + n_classes) * 3
# backbone
self.csp1 = CSP1()
self.csp2 = CSP(64)
self.csp3 = CSP(128)
self.csp4 = CSP(256)
self.csp5 = CSP(512)
#neck
self.neck = Neck(inference)
self.head = Yolov4Head(output_ch, n_classes, inference)
def forward(self,input):
d1 = self.csp1(input)
d2 = self.csp2(d1)
d3 = self.csp3(d2)
d4 = self.csp4(d3)
d5 = self.csp5(d4)
x20, x13, x6 = self.neck(d5,d4,d3)
output = self.head(x20,x13,x6)
return output