自己动手写CNN Inference框架之 (四) avgpool

今天我们给大家介绍的是CNN中一个重要的组成模块---池化层，也叫pooling。我们本文跟大家一起讨论如何一步一步的实现平均池化层。大家在实现的过程中，也可以尝试结合矩阵运算的思路去思考整个过程，这里暂时不考虑性能上的问题。相同的，平均池化层也可以拆分成img2col重排数据、矩阵相乘两个步骤进行实现。绍CNN中的另一个重要的Op --- 全连接层（Dense layers）。
首发：https://zhuanlan.zhihu.com/p/73062296
作者：张新栋

用一种非常平凡的实现方式，平均池化层（avgpool）即用滑动窗口的方式移动池化mask，然后计算mask覆盖到的元素集合的平均值，以此类推重复所有通道及区域。实现方式如下代码，我们以2x2的池化层为例，3x3的同理。

void AvgPooling(
    Tensor *input, Tensor* output, 
    int size_h, int size_w, 
    int stride_h, int stride_w, 
    int pad_h, int pad_w
)
{
    assert(input  != NULL);
    assert(output != NULL);
    assert(getBatch(input)  == 1);
    assert(getBatch(output) == 1);
    assert(getChannel(input) == getChannel(output));

    int IW = getWidth(input);
    int IH = getHeight(input);
    int OW = getWidth(output);
    int OH = getHeight(output);
    int OC = getChannel(output);

    assert(OW == getOutputWidth(IW, size_w, stride_w, pad_w));
    assert(OH == getOutputHeight(IH, size_h, stride_h, pad_h));

    int oc, oh, ow;

    int iteration_c = floor(OC / 4);
    int remain_c    = OC % 4;
    if (size_h == 2 && size_w == 2)
    {
        // unroll parts
        for (oc = 0; oc < iteration_c; ++oc)
        {
            for (oh = 0; oh < OH; ++oh)
            {
                for (ow = 0; ow < OW; ++ow)
                {
                    int idx0 = (oc + 0) * IH * IW + oh * stride_h * IW + ow * stride_w;
                    int idx1 = (oc + 1) * IH * IW + oh * stride_h * IW + ow * stride_w;
                    int idx2 = (oc + 2) * IH * IW + oh * stride_h * IW + ow * stride_w;
                    int idx3 = (oc + 3) * IH * IW + oh * stride_h * IW + ow * stride_w;

                    int out_idx0 = (oc + 0) * OH * OW + oh * OW + ow;
                    int out_idx1 = (oc + 1) * OH * OW + oh * OW + ow;
                    int out_idx2 = (oc + 2) * OH * OW + oh * OW + ow;
                    int out_idx3 = (oc + 3) * OH * OW + oh * OW + ow;

                    float* val0 = input->data + idx0;
                    float* val1 = input->data + idx1;
                    float* val2 = input->data + idx2;
                    float* val3 = input->data + idx3;

                    float avg0  = (val0[0] + val0[0 + 1] + val0[IW] + val0[IW + 1]) * 0.25;
                    float avg1  = (val1[0] + val1[0 + 1] + val1[IW] + val1[IW + 1]) * 0.25;
                    float avg2  = (val2[0] + val2[0 + 1] + val2[IW] + val2[IW + 1]) * 0.25;
                    float avg3  = (val3[0] + val3[0 + 1] + val3[IW] + val3[IW + 1]) * 0.25;

                    output->data[out_idx0] = avg0;
                    output->data[out_idx1] = avg1;
                    output->data[out_idx2] = avg2;
                    output->data[out_idx3] = avg3;
                }
            }
        }
        // remained parts
        for (oc = 0; oc < remain_c; ++oc)
        {
            for (oh = 0; oh < OH; ++oh)
            {
                for (ow = 0; ow < OW; ++ow)
                {
                    int idx     = (OC - oc - 1) * IH * IW + oh * stride_h * IW + ow * stride_w;
                    int out_idx = (OC - oc - 1) * OH * OW + oh * OW + ow;
                    float* val  = input->data + idx;
                    float avg   = (val[0] + val[0 + 1] + val[IW] + val[IW + 1]) * 0.25;
                    output->data[out_idx] = avg;
                }
            }
        }
    }
}

解析tensorflow的dense参数

我们进行了avgpool的开发后。第二步希望能提取tensorflow中对应op的参数，对其数据格式后进行数值验证。需要再强调的是，我们采用的数据排布格式为NCHW，这一点与tensorflow的默认输入格式不同。下面是提取avgpool参数的python脚本，我们其实只关注avgpool的mask的宽、高，水平滑动和竖直滑动的步长，还有padding的数值。

#coding=utf-8
import tensorflow as tf
import numpy as np


a  = np.arange(48)
a  = np.reshape(a, (1,3,4,4))
a  = np.transpose(a, (0,2,3,1))

## build graph
input  = tf.placeholder(tf.float32,shape=(1,4,4,3),name="input")
avg2d  = tf.layers.average_pooling2d(input, pool_size=2, strides=2, padding='valid')


## parser parameter
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    tvars = tf.trainable_variables()
    tvars_vals = sess.run(tvars)
    graph_val = tf.get_default_graph()
    graph_def = tf.get_default_graph().as_graph_def(add_shapes=True)
    result = sess.run(avg2d, feed_dict = {input: a})
    result = np.transpose(result, (0, 3, 1, 2))
    print result


    values_file = open("./models/3_values.bat", "w")
    config_file = open("./models/3_config.bat", "w")

    for n in graph_def.node:
        if n.name == "average_pooling2d/AvgPool":
            attrs   = n.attr
            padding = attrs['padding'].s
            strides = np.array(attrs['strides'].list.i, dtype=np.uint8)
            format  = attrs['data_format'].s
            ksize   = np.array(attrs['ksize'].list.i, dtype=np.uint8)
            config_file.write("AVG_POOL\n");
            config_file.write(str(ksize[1]) + "\n");
            config_file.write(str(ksize[2]) + "\n");
            config_file.write(padding + "\n")
            config_file.write(str(strides[1]) + "\n");
            config_file.write(str(strides[2]) + "\n");
            config_file.write(format + "\n")
    values_file.close()
    config_file.close()

基于C的模型解析和测试

保存好的参数如下所示，需要注意的是我们虽然生成了values的文件，但是avgpool中没有权值参数。生成的config文件如下所示：

AVG_POOL
2
2
VALID
2
2
NHWC

第一个参数为Op的类型，第二、三个为pooling-mask的高和宽，第四个参数的pooling的padding实现（有valid和same两种），第五、六个参数为stride\_h和stride\_w，最后一个为数据的格式。我们对其进行相应的参数解析，如下代码：

void extractOpConfig(OpConfig* opConfig, const char* filename)
{
    assert(opConfig != NULL);
    FILE* file = fopen(filename, "r");

    char type[20];
    fgets(type, sizeof(type), file);

    if (strstr(type, "CONV2D")  != NULL) {

        opConfig->type = CONV2D;

        char N_str[20];
        fgets(N_str, sizeof(N_str), file);
        int N = atoi(N_str);

        char C_str[20];
        fgets(C_str, sizeof(C_str), file);
        int C = atoi(C_str);

        char H_str[20];
        fgets(H_str, sizeof(H_str), file);
        int H = atoi(H_str);

        char W_str[20];
        fgets(W_str, sizeof(W_str), file);
        int W = atoi(W_str);

        char padding[20];
        int  pad_w;
        int  pad_h;
        fgets(padding, sizeof(padding), file);
        if(strstr(padding, "SAME") != NULL) {
            pad_w = 0;
            pad_h = 0;
        } else {
            pad_w = 0;
            pad_h = 0;
        }

        char string_h_str[20];
        fgets(string_h_str, sizeof(string_h_str), file);
        int stride_h = atoi(string_h_str);

        char string_w_str[20];
        fgets(string_w_str, sizeof(string_w_str), file);
        int stride_w = atoi(string_w_str);

        char format[30];
        fgets(format, sizeof(format), file);

        opConfig->type     = CONV2D;
        opConfig->dims[0]  = N;
        opConfig->dims[1]  = C;
        opConfig->dims[2]  = H;
        opConfig->dims[3]  = W;
        opConfig->pad_w    = 0;
        opConfig->pad_h    = 0;
        opConfig->stride_w = stride_w;
        opConfig->stride_h = stride_h;

    }
    if (strstr(type, "AVG_POOL") != NULL) {

        char H_str[20];
        fgets(H_str, sizeof(H_str), file);
        int H = atoi(H_str);

        char W_str[20];
        fgets(H_str, sizeof(H_str), file);
        int W = atoi(H_str);

        char padding[20];
        int  pad_w;
        int  pad_h;
        fgets(padding, sizeof(padding), file);
        if(strstr(padding, "SAME") != NULL) {
            pad_w = 0;
            pad_h = 0;
        } else {
            pad_w = 0;
            pad_h = 0;
        }

        char string_h_str[20];
        fgets(string_h_str, sizeof(string_h_str), file);
        int stride_h = atoi(string_h_str);

        char string_w_str[20];
        fgets(string_w_str, sizeof(string_w_str), file);
        int stride_w = atoi(string_w_str);

        char format[30];
        fgets(format, sizeof(format), file);

        opConfig->type = AVG_POOL;
        opConfig->ksize_h  = H;
        opConfig->ksize_w  = W;
        opConfig->pad_h    = pad_h;
        opConfig->pad_w    = pad_w;
        opConfig->stride_h = stride_h;
        opConfig->stride_w = stride_w;
    }
    if (strstr(type, "DENSE")   != NULL) {

        char H_str[20];
        fgets(H_str, sizeof(H_str), file);
        int H = atoi(H_str);

        char W_str[20];
        fgets(W_str, sizeof(W_str), file);
        int W = atoi(W_str);

        opConfig->type     = DENSE;
        opConfig->dims[0]  = 1;
        opConfig->dims[1]  = 1;
        opConfig->dims[2]  = H;
        opConfig->dims[3]  = W;
    }

    fclose(file);
}

下面我们需要写业务代码进行模型测试，逻辑很简单，首先解析模型的config文件，然后根据config的参数进行values的初始化、dense的操作，最后跟python的输出结果进行精确性的对比。

void test_avg_pool()
{
    // conv2d operation
    Tensor* input  = NULL;
    Tensor* output = NULL;
    OpConfig* config = (OpConfig*) malloc(sizeof(OpConfig));

    extractOpConfig (config, "./models/3_config.bat");


    //init input
    int input_dims[4] = {1,3,4,4};
    initTensor(input, input_dims, 0.0f);
    for (int i = 0; i < input->nums; ++i)
        input->data[i] = i;

    //init output
    int output_dims[4] = {1,3,2,2};
    initTensor(output, output_dims, 0.0f);


    // Conv2d
    clock_t start = clock();
    AvgPooling(
      input, output, 
      config->ksize_h, config->ksize_w, 
      config->stride_h, config->stride_w, 
      config->pad_h, config->pad_w
    );
    printTensor(output);
    clock_t end   = clock();
    float duration = float(end - start) / CLOCKS_PER_SEC;

    // free Tensor
    freeTensor(input);
    freeTensor(output);
    free(config);
}

最后

本文我们对应tensorflow中的二维卷积操作实现了简单的avgpool操作，然后用python脚本解析tensorflow的graph模型中的avgpool的参数，最后保存成自定义的文件格式；最后按照自定义的文件格式，读取和解析Op及Op对应参数，在自己的框架中实现dense的数值计算，跟tensorflow的数值计算结果一致，后续我们也会将本教程代码的github链接同步上来。

欢迎大家留言讨论、关注专栏，谢谢大家！

推荐阅读

专注嵌入式端的AI算法实现，欢迎关注作者微信公众号和知乎嵌入式AI算法实现专栏。

更多嵌入式AI相关的技术文章请关注极术嵌入式AI专栏

基于C的模型解析和测试

最后

推荐阅读

目录