自己写 YOLOv4 模型实现(Pytorch 版本)(1)

携手创造，共同成长！这是我参与「日新方案 8 月更文应战」的第8天，点击检查活动概况

前言

挑选有一个有代表网络结构，然后从头到尾完成了一遍这个网络结构，这样才会对这个网络结构有一个比较深印象。关于同类其他网络结构也会有感觉。带着这种主意今天就自己动手完成一个完好 YOLOv4 的网络。

这里是在 github 上挑选一个完成的 Pytorch 版本的项目，下载下来做参阅，主要是先仿照，然后再去优化，构成自己东西。

import torch
import torch.nn as nn
import torch.nn.functional as F

Conv 基类

这个网络依据卷积，通常咱们提到的卷积块会包含一个卷积层、BN 层和激活层，依据激活层不同在模型分为 CBM 和 CBL，他们别离选用激活函数是 Mish 和 LeakyReLU

class Conv(nn.Module):
    def __init__(self,in_channels,out_channels,kernel_size,stride,bn=True,bias=False):
        super().__init__()
        pad =(kernel_size - 1) // 2
        self.conv = nn.ModuleList()
        if bias:
            self.conv.append(nn.Conv2d(in_channels,out_channels,kernel_size,stride,pad))
        else:
            self.conv.append(nn.Conv2d(in_channels,out_channels,kernel_size,stride,pad,bias=False))
        if bn:
            self.conv.append(nn.BatchNorm2d(out_channels))
    def forward(self,x):
        for l in self.conv:
            x = l(x)
        return x

\frac{kernel size – 1}{2}

完成 CBM

Mish 激活函数

class Mish(nn.Module):
    def __init__(self):
        super().__init__()
    def forward(self,x):
        return x * (torch.tanh(F.softplus(x)))

class CBM(Conv):
    def __init__(self,in_channels,out_channels,kernel_size,stride,bn=True,bias=False):
        super().__init__(in_channels,out_channels,kernel_size,stride,bn=bn,bias=bias)
        self.conv.append(Mish())

CBL

在 CBL 块选用 LeackyReLu 激活函数

class CBL(Conv):
    def __init__(self,in_channels,out_channels,kernel_size,stride,bn=True,bias=False):
        super().__init__(in_channels,out_channels,kernel_size,stride,bn=bn,bias=bias)
        self.conv.append(nn.LeakyReLU(0.1,inplace=True))

ResBlock

class ResBlock(nn.Module):
    def __init__(self,ch,nblocks=1,shortcut=True):
        super().__init__()
        self.shortcut = shortcut
        self.module_list = nn.ModuleList()
        for i in range(nblocks):
            resblock = nn.ModuleList()
            resblock.append(CBM(ch,ch,1,1))
            resblock.append(CBM(ch,ch,3,1))
            self.module_list.append(resblock)
    def forward(self,x):
        for m in module_list:
            h = x
            for res in m:
                h = res(h)
            x = x + h if self.shortcut else h
        return x

class Upsample(nn.Module):
    def __init__(self):
        super(Upsample, self).__init__()
    def forward(self, x, target_size, inference=False):
        assert (x.data.dim() == 4)
        if inference:
            x_1 = x.view(x.size(0), x.size(1), x.size(2), 1, x.size(3), 1)
            #print(x_1.shape)
            x_2 = x_1.expand(x.size(0), x.size(1), x.size(2), target_size[2] // x.size(2), x.size(3), target_size[3] // x.size(3))
            #print(x_2.shape)
            x_3 = x_2.contiguous().view(x.size(0), x.size(1), target_size[2], target_size[3])
            #print(x_3.shape)
            return x_3
        else:
            return F.interpolate(x, size=(target_size[2], target_size[3]), mode='nearest')

当在推理过程中，是将特征图

backbone_output = torch.randn(2,256,13,13)
backbone_output.shape
backbone_output_2 = torch.randn(2,256,26,26)
upsample = Upsample()
upsample_output = upsample(backbone_output,backbone_output_2.size())
print(upsample_output.shape) #torch.Size([2, 256, 26, 26])

由于 CSP1 比较特别所以拿出来单独完成，其他

class CSP1(nn.Module):
    def __init__(self):
        #input 416 x 416 x 3-> 416 x 416 x 32
        self.conv1 = CBM(3,32,3,1)
        #416 x 416 x 32 -> 208 x 208 x 64
        self.conv2 = CBM(32,64,3,2)
        #208 x 208 x 64 -> 208 x 208 x 64
        self.conv3 = CBM(64,64,1,1)
        # [route]
        # layers = -2 
        #208 x 208 x 64 -> 208 x 208 x 64
        self.conv4 = CBM(64,64,1,1)
        #208 x 208 x 64 -> 208 x 208 x 64
        self.conv5 = CBM(64,32,1,1)
        self.conv6 = CBM(32,64,1,1)
        #[shortcut]
        #from=-3
        #activation = linear
        self.conv7 = CBM(64,64,1,1)
        #[route]
        #layers =-1,-7
        self.conv8 = CBM(128,64,1,1)
    def forward(self,input):
        x1 = self.conv1(input)
        x2 = self.conv2(x1)
        x3 = self.conv3(x2)
        # route -2
        # 向上数 2 层，也便是经过残差块的那个结构
        x4 = self.conv4(x2)
        x5 = self.conv5(x4)
        x6 = self.conv6(x5)
        #shortcut -3
        x6 = x6 + x4
        #transition
        x7 = self.conv7(x6)
        # [route]
        # layer = -1, -7
        x7 = torch.cat([x7,x3])
        x8 = self.conv8(x7)
        return x8

class CSP(nn.Module):
    def __init__(self,in_channels):
        self.conv1 = CBM(in_channels,in_channels*2,3,2)
        # [route] -2
        # CSP 经过 2 个 1x1 卷积对通道进行拆分
        self.conv2 = CBM(in_channels*2,in_channels,1,1)
        self.conv3 = CBM(in_channels*2,in_channels,1,1)
        self.resblock = ResBlock(ch=in_channels,nblocks=2)
        #[shortcut] -3
        self.conv4 = CBM(in_channels,in_channels,1,1)
        #[route] -1 -10
        self.conv5 = CBM(in_channels*2,in_channels*2,1,1)
    def forward(self,input):
        x1 = self.conv1(input)
        x2 = self.conv2(x1)
        x3 = self.conv3(x1)
        r = self.resblock(x3)
        x4 = self.conv4(r)
        x4 = torch.cat([x4,x2],dim=1)
        x5 = self.conv5(x4)
        return x5

Neck 模块完成

class Neck(nn.Module):
    def __init__(self,inference=False):
        # 是否为推理
        self.inference = inference
        #ConvSet1
        self.conv1 = CBL(1024,512,1,1)
        self.conv2 = CBL(512,1024,3,1)
        self.conv3 = CBL(1024,512,1,1)
        #SPP
        self.maxpool1 = nn.MaxPool2d(kernel_size=5, stride=1, padding=5//2)
        self.maxpool2 = nn.MaxPool2d(kernel_size=9, stride=1, padding=9//2)
        self.maxpool3 = nn.MaxPool2d(kernel_size=13, stride=1, padding=13//2)
        #convSet2
        self.conv4 = CBL(2048,512,1,1)
        self.conv5 = CBL(512,1024,3,1)
        self.conv6 = CBL(1024,512,1,1)
        # 
        self.conv7 = CBL(512,256,1,1)
        # UP
        self.upsample1 = Upsample()
        # R 85
        self.conv8 = CBL(512,256,1,1)
        #convSet3
        self.conv9 = CBL(512,256,1,1)
        self.conv10 = CBL(256,512,3,1)
        self.conv11 = CBL(512,256,1,1)
        self.conv12 = CBL(256,512,3,1)
        self.conv13 = CBL(512,256,1,1)
        self.conv14 = CBL(256,128,1,1)
        self.upsample2 = Upsample()
        self.conv15 = CBL(256,128,1,1)
        #convSet4
        self.conv16 = CBL(256,128,1,1)
        self.conv17 = CBL(128,256,3,1)
        self.conv18 = CBL(256,128,1,1)
        self.conv19 = CBL(128,256,3,1)
        self.conv20 = CBL(256,128,1,1)
    def forward(self,input,csp4,csp3):
        #convSet1
        x1 = self.conv1(input)
        x2 = self.conv2(x1)
        x3 = self.conv3(x2)
        # SPP
        m1 = self.maxpool1(x3)
        m2 = self.maxpool1(x3)
        m3 = self.maxpool1(x3)
        spp = torch.cat([m3,m2,m1,x3],dim=1)
        # SPP end
        #convSet2
        x4 = self.conv4(spp)
        x5 = self.conv5(x4)
        #featuremap(13x13x1024)
        x6 = self.conv6(x5)
        x7 = self.conv7(x6)
        #UP
        up = self.upsample1(x7, csp4.size(),self.inference)
        #downsample4 stride 16
        x8 = self.conv8(csp4)
        x8 = torch.cat([x8,up],dim=1)
        #convSet3
        x9 = self.conv9(x8)
        x10 = self.conv10(x9)
        x11 = self.conv11(x10)
        x12 = self.conv12(x11)
        #featuremap(26x26x256)
        x13 = self.conv13(x12)
        x14 = self.conv14(x13)
        up = self.upsample2(x14,csp3.size(),self.inference)
        #downsample3 stride 8
        x15 = self.conv15(csp3)
        x15 = torch.cat([x15,up],dim=1)
        # convSet4 
        x16 = self.conv16(x15)
        x17 = self.conv17(x16)
        x18 = self.conv18(x17)
        x19 = self.conv19(x18)
        # featuremap(52x52x128)
        x20 = self.conv20(x19)
        return x20, x13, x6

class YoloLayer(nn.Module):
    def __init__(self,anchor_mask=[],
                num_classes=0,
                anchors=[],
                num_anchors=1,
                stride=32,
                model_out=False):
        pass

def get_region_boxes(inputs):
    pass

Head 模块完成

在 Head 部分主要经过将低层和高层交融后得到特征图再做下采样来和高层(低分辨)输出做交融后进行输出，这里用 1×1 卷积。

class Yolov4Head(nn.Module):
    def __init__(self,output_ch,n_classes,anchors,inference=False):
        super().__init__()
        self.inference = inference
        self.anchors = anchors
        # 52x52 特征图输出
        self.conv1 = CBL(128,256,3,1)
        # 1x1 conv  detector
        self.conv2 = Conv(256,output_ch,1,1,bn=False,bias=True)
        self.yolo1 = YoloLayer(
                anchor_mask=[0,1,2],
                num_classes=n_classes,
                anchors=anchors,
                num_anchors=9,stride=8
            )
        # downsmaple
        self.conv3 = CBL(128,256,3,2)
        #convSet5
        self.conv4 = CBL(512,256,1,1)
        self.conv5 = CBL(256,512,3,1)
        self.conv6 = CBL(512,256,1,1)
        self.conv7 = CBL(256,512,3,1)
        self.conv8 = CBL(512,256,1,1)
        self.conv9 = CBL(256,512,3,1)
        # 26 x 26 detector
        self.conv10 = Conv(256,output_ch,1,1,bn=False,bias=True)
        self.yolo2 = YoloLayer(
                anchor_mask=[3,4,5],
                num_classes=n_classes,
                anchors=anchors,
                num_anchors=9,stride=16
            )
        self.conv11 = CBL(256,512,3,2)
        #convSet6
        self.conv12 = CBL(1024,512,1,1)
        self.conv13 = CBL(512,1024,3,1)
        self.conv14 = CBL(1024,512,1,1)
        self.conv15 = CBL(512,1024,3,1)
        self.conv16 = CBL(1024,512,1,1)
        self.conv17 = CBL(512,1024,3,1)
        self.conv10 = Conv(1024,output_ch,1,1,bn=False,bias=True)
        self.yolo3 = YoloLayer(
                anchor_mask=[6,7,8],
                num_classes=n_classes,
                anchors=anchors,
                num_anchors=9,stride=32
            )
    def forward(self,input1,input2,input3):
        x1 = self.conv1(input1)
        x2 = self.conv2(x1)
        x3 = self.conv3(input1)
        x3 = torch.cat([x3,input2],dim=1)
        #convSet5
        x4 = self.conv4(x3)
        x5 = self.conv5(x4)
        x6 = self.conv6(x5)
        x7 = self.conv7(x6)
        x8 = self.conv8(x7)
        x9 = self.conv9(x8)
        x10 = self.conv10(x9)
        x11 = self.conv11(x10)
        x11 = torch.cat([x11,input3],dim=1)
        x12 = self.conv12(x11)
        x13 = self.conv13(x12)
        x14 = self.conv14(x13)
        x15 = self.conv15(x14)
        x16 = self.conv16(x15)
        x17 = self.conv17(x16)
        x18 = self.conv18(x17)
        if self.inference:
            y1 = self.yolo1(x2)
            y2 = self.yolo2(x10)
            y3 = self.yolo3(x18)
            return get_region_boxes([y1,y2,y3])
        else:
            return [x2,x10,x18]

class Yolov4(nn.Module):
    def __init__(self, yolov4conv137weight=None, n_classes=80, inference=False):
        super().__init__()
        #输出通道数，也便是
        output_ch = (4 + 1 + n_classes) * 3
        # backbone
        self.csp1 = CSP1()
        self.csp2 = CSP(64)
        self.csp3 = CSP(128)
        self.csp4 = CSP(256)
        self.csp5 = CSP(512)
        #neck
        self.neck = Neck(inference)
        self.head = Yolov4Head(output_ch, n_classes, inference)
    def forward(self,input):
        d1 = self.csp1(input)
        d2 = self.csp2(d1)
        d3 = self.csp3(d2)
        d4 = self.csp4(d3)
        d5 = self.csp5(d4)
        x20, x13, x6 = self.neck(d5,d4,d3)
        output = self.head(x20,x13,x6)
        return output

声明：本站所有文章，如无特殊说明或标注，均为本站原创发布。任何个人或组织，在未征得本站同意时，禁止复制、盗用、采集、发布本站内容到任何网站、书籍等各类媒体平台。如若本站内容侵犯了原著者的合法权益，可联系我们进行处理。