Tengine推理引擎 | Graph源码解读 - 极术社区 - 连接开发者与智能计算生态

上篇文章，我们对Tengine的API做了源码解读

文章很长，大部分读者是不愿意花大量时间看的，这些我也都清楚。不过当我们在实际部署的时候难免会遇到各种各样问题，能提前对底层实现有所了解，遇到问题能够及时发现问题来源，还能提issue，和虫叔讨论，我觉得也是很不错的学习路线。

此外，在debug的时候，除了参考官方提供的文档资源，还可以参考本系列文章的讲解，当作额外的说明文档，也是不错的选择。

我决定在源码阅读的学习路线结束之后，再出几篇部署实践的文章，落实理论的学习。这次一定，不鸽。

言归正传，还是先贴上关于Graph相关的结构体的思维导图，graph、node以及tensor。学过数据结构的同学，从字面上应该都能猜出来，graph是由很多的node组成的，每个node作为各种算子对tensor进行加工和处理，tensor是我们网络的输入和输出，整个的网络是一个graph，而部署在加速设备上，可能会被引擎切开成多个子图，不同的子图在不同的设备上加速效果不同。辛苦Tengine团队了！

下面放上相关的比较重要的api及其实现代码

Tensor相关

init_ir_tensor：初始化tensor的所有属性
create_ir_tensor：为graph创建一个名为tensor_name的tensor
create_ir_tensor_name_from_index：通过指定id创建tensor名，核心就是return "tensor_" + str(index)
get_ir_tensor_index_from_name：通过tensor的名字查找tensor在graph中的index
dump_ir_tensor：调试输出tensor信息

set_ir_tensor_consumer：设置tensor的consumer

// 初始化tensor的所有属性
void init_ir_tensor(ir_tensor_t* ir_tensor, int tensor_index, int data_type)
{
ir_tensor->index = tensor_index; // tensor的id
ir_tensor->producer = -1;  // tensor的producer初始为-1

// 为consumer开辟足够大的空间，每一个consumer的id为-1
ir_tensor->consumer = (int16_t*)sys_malloc(sizeof(int16_t) * TE_MAX_CONSUMER_NUM);
for (int i = 0; i < TE_MAX_CONSUMER_NUM; i++)
{
    ir_tensor->consumer[i] = -1;
}
// 初始化tensor的其他属性
ir_tensor->reshaped = 0;
ir_tensor->consumer_num = 0;
ir_tensor->tensor_type = TENSOR_TYPE_VAR;
ir_tensor->data_type = data_type;
ir_tensor->dim_num = 0;
ir_tensor->elem_size = get_tenser_element_size(data_type);
ir_tensor->subgraph_num = 0;
ir_tensor->free_host_mem = 0;
ir_tensor->internal_allocated = 1;
ir_tensor->layout = TENGINE_LAYOUT_NCHW;
ir_tensor->quant_param_num = 0;
ir_tensor->elem_num = 0;

for (int i = 0; i < MAX_SHAPE_DIM_NUM; i++)
{
    ir_tensor->dims[i] = 0;
}

ir_tensor->data = NULL;
ir_tensor->name = NULL;
ir_tensor->scale_list = NULL;
ir_tensor->zp_list = NULL;
ir_tensor->dev_mem = NULL;
ir_tensor->subgraph_list = NULL;
}

// 为graph创建一个名为tensor_name的tensor
ir_tensor_t* create_ir_tensor(ir_graph_t* ir_graph, const char* tensor_name, int data_type)
{
// 首先为tensor开辟空间
ir_tensor_t* ir_tensor = (ir_tensor_t*)sys_malloc(sizeof(ir_tensor_t));

if (NULL == ir_tensor)
{
    return NULL;
}
// 初始化tensor的所有属性，tensor的id为graph->tensor_num
init_ir_tensor(ir_tensor, ir_graph->tensor_num, data_type);
// tensor的layout 应随着graph的layout设置
ir_tensor->layout = ir_graph->graph_layout;
// 为graph的tensor列表开辟新的空间
ir_tensor_t** new_tensor_list = (ir_tensor_t**)sys_realloc(ir_graph->tensor_list, sizeof(ir_tensor_t*) * (ir_graph->tensor_num + 1));

if (NULL == new_tensor_list)
{
    sys_free(ir_tensor);
    return NULL;
}
// 如果制定了tensor_name，需要为tensor_name开辟空间，让tensor->name指向这个空间
if (NULL != tensor_name)
{
    const int str_length = align((int)strlen(tensor_name) + 1, TE_COMMON_ALIGN_SIZE);
    ir_tensor->name = (char*)sys_malloc(str_length);

    if (NULL == ir_tensor->name)
    {
        sys_free(ir_tensor);
        return NULL;
    }
    // 将tensor_name 拷贝到tensor->name的空间中
    memset(ir_tensor->name, 0, str_length);
    strcpy(ir_tensor->name, tensor_name);
}
// 新创建的tensor存储到新的tensor列表中的最后的位置
new_tensor_list[ir_graph->tensor_num] = ir_tensor;
// graph的tensor列表指向tensor，并且tensor列表的元素数+1
ir_graph->tensor_list = new_tensor_list;
ir_graph->tensor_num++;

return ir_tensor;
}

// 通过指定id创建tensor名，核心就是return "tensor_" + str(index)
char* create_ir_tensor_name_from_index(int index)
{
char* name = (char*)sys_malloc(TE_COMMON_ALIGN_SIZE * 2);
if (NULL == name)
{
    return NULL;
}

sprintf(name, "tensor_%7d", index);
return name;
}

// 通过tensor的名字 查找tensor在graph中的index
int get_ir_tensor_index_from_name(ir_graph_t* graph, const char* tensor_name)
{
// 如果tensor_name为"tensor_2", last_symbol_ptr就是"_2"
const char* last_symbol_ptr = strrchr(tensor_name, '_');
// 如果tensor_name是"tensor_"+str(index)的这种形式，就能通过这种split的方式找到index
if (NULL != last_symbol_ptr)
{
    // 那么index为2
    const int index = atoi(++last_symbol_ptr);
    // 如果index不越界
    if (0 <= index && index < graph->tensor_num)
    {
        // 从graph的tensor列表中索引
        const ir_tensor_t* const tensor = graph->tensor_list[index];
        // 如果该tensor的名字是我们要查找的tensor_name，就返回index
        if (NULL != tensor->name && 0 == strcmp(tensor->name, tensor_name))
        {
            return index;
        }
    }
}

// 如果tensor_name不是"tensor_"+str(index)这种形式，那就要遍历所有的tensor判断
for (int i = 0; i < graph->tensor_num; i++)
{
    const ir_tensor_t* const tensor = graph->tensor_list[i];

    if (tensor->name && 0 == strcmp(tensor->name, tensor_name))
    {
        return i;
    }
}

return -1;
}

// 打印tensor的信息，在调试的时候可以用
void dump_ir_tensor(ir_graph_t* g, ir_tensor_t* t)
{
// data_type: { int8, uint8, fp32, fp16, int32 }
// tensor_type: { const, input, var, dep }

if (NULL != t->name)
{
    // 如果tensor->name指定了，打印name，data_type,以及tensor_type
    TLOG_INFO("%s type: %s/%s", t->name, get_tensor_data_type_string(t->data_type), get_tensor_type_string(t->tensor_type));
}
else
{
    // 没有指定name，打印index，data_type以及tensor_type
    TLOG_INFO("tensor_%d type: %s/%s", t->index, get_tensor_data_type_string(t->data_type),
              get_tensor_type_string(t->tensor_type));
}
// 打印tensor的shape
if (0 < t->dim_num)
{
    char shape_buf[64];
    sprintf(shape_buf, " shape: [");

    for (int i = 0; i < t->dim_num - 1; i++)
    {
        sprintf(shape_buf + strlen(shape_buf), "%d,", t->dims[i]);
    }

    sprintf(shape_buf + strlen(shape_buf), "%d]", t->dims[t->dim_num - 1]);

    TLOG_INFO("%s", shape_buf);
}
else
{
    TLOG_INFO(" shape: []");
}
// 打印tensor的producer和consumer
if (0 <= t->producer)
{
    ir_node_t* node = g->node_list[t->producer];

    TLOG_INFO(" from node: %d", node->index);
}

if (t->consumer_num > 0)
    TLOG_INFO(" (consumer: %d)", t->consumer_num);

TLOG_INFO("\n");
}

// 设置tensor的consumer
int set_ir_tensor_consumer(ir_tensor_t* ir_tensor, const int index)
{
// 如果tensor的consumer数量超过预先分配的最大数量
if (TE_MAX_CONSUMER_NUM <= ir_tensor->consumer_num)
{
    // 重新分配更大的内存空间，之前有的元素也填入进去
    int16_t* new_consumer = (int16_t*)sys_realloc(ir_tensor->consumer, sizeof(int16_t) * (ir_tensor->consumer_num + 1));
    if (NULL == new_consumer)
    {
        return -1;
    }
    // 让tensor的consumer指向新分配的空间
    ir_tensor->consumer = new_consumer;
}
// 将新的consumer index填入consumer的最后一位
ir_tensor->consumer[ir_tensor->consumer_num] = index;
// consumer的数量+1
ir_tensor->consumer_num++;

return 0;
}

Node相关

init_ir_node：初始化node的属性
create_ir_node：给graph创建名为node_name的节点
set_ir_node_input_tensor：设置node的第index个tensor

// 初始化node，给node属性赋默认值
static void init_ir_node(ir_node_t* ir_node, int op_type, int op_version, int node_index)
{
    ir_node->index = node_index;
    ir_node->dynamic_shape = 0;
    ir_node->input_num = 0;
    ir_node->output_num = 0;
    ir_node->node_type = TE_NODE_TYPE_INTER;
    ir_node->input_tensors = NULL;
    ir_node->output_tensors = NULL;
    ir_node->name = NULL;
    ir_node->op.type = op_type;
    ir_node->op.version = op_version;
    ir_node->op.same_shape = 1;
    ir_node->op.param_size = 0;
    ir_node->op.param_mem = NULL;
    ir_node->op.infer_shape = NULL;
    ir_node->subgraph_idx = -1;
}

// 给graph创建名为node_name的节点
ir_node_t* create_ir_node(struct graph* ir_graph, const char* node_name, int op_type, int op_version)
{
    // 为node分配内存空间
    ir_node_t* node = (ir_node_t*)sys_malloc(sizeof(ir_node_t));
    if (NULL == node)
    {
        return NULL;
    }
    // 初始化node属性，节点id为graph的节点个数
    init_ir_node(node, op_type, op_version, ir_graph->node_num);
    // 根据op类型找到method
    ir_method_t* method = find_op_method(op_type, op_version);
    
    if ((NULL != method) && (NULL != method->init) && (method->init(&node->op) < 0))    ？？？？？？？？？
    {
        sys_free(node);
        return NULL;
    }
    
    if ((NULL == method) || (NULL == method->init) || (method->init(&node->op) < 0)) 
    {
        sys_free(node);
        return NULL;
    }
    // 为graph的node列表重新分配更大的内存空间，之前的node列表重新填充进去
    ir_node_t** new_node_list = (ir_node_t**)sys_realloc(ir_graph->node_list, sizeof(ir_node_t*) * (ir_graph->node_num + 1));

    if (NULL == new_node_list)
    {
        return NULL;
    }
   // node的相关graph指向当前指定的graph
    node->graph = ir_graph;
    // 如果指定了node name, 则为node->name开辟空间，将node_name拷贝进去
    if (NULL != node_name)
    {
        node->name = strdup(node_name);
    }
  // 新创建的node列表的最后一位设置为新创建的节点
    new_node_list[ir_graph->node_num] = node;
    // graph的node列表指向新的节点列表，node的数量加1
    ir_graph->node_list = new_node_list;
    ir_graph->node_num++;

    return node;
}

// 设置node的第idx个输入tensor
int set_ir_node_input_tensor(ir_node_t* node, int input_idx, ir_tensor_t* tensor)
{
    // 如果idx大于node的输入数，就重新分配更大的空间，将旧的输入tensors分配进去
    if (input_idx >= node->input_num)
    {
        int16_t* new_tensor = (int16_t*)sys_realloc(node->input_tensors, sizeof(int16_t) * (input_idx + 1));

        if (NULL == new_tensor)
        {
            return -1;
        }
        // 后开辟出来的内存的id 初始化为-1
        for (int i = node->input_num; i < input_idx + 1; i++)
        {
            new_tensor[i] = -1;
        }
        // node的输入tensors指向新开辟的空间，输入数重新赋值
        node->input_tensors = (uint16_t*)new_tensor;
        node->input_num = input_idx + 1;
    }
    // 第idx个tensor的index赋值为tensor的id
    node->input_tensors[input_idx] = tensor->index;
    // 设置tensor的consumer
    if (set_ir_tensor_consumer(tensor, node->index) < 0)
    {
        return -1;
    }
    return 0;
}

Graph相关

create_ir_graph：创建graph，context参数与device相关
init_ir_graph：初始化graph的属性
set_ir_graph_input_node：设置graph的输入node的index列表
get_ir_graph_tensor：返回graph的第index个tensor
get_ir_graph_node：返回graph的第index个node
get_ir_graph_subgraph：返回graph的第index个subgraph

dump_ir_graph：调试输出graph信息

// 使用context创建graph
// context中包含device的信息，所以在使用非cpu设备的时候一定要定义context
ir_graph_t* create_ir_graph(struct context* context)
{
// 给graph和graph的attribute分配空间
ir_graph_t* ir_graph = (ir_graph_t*)sys_malloc(sizeof(ir_graph_t));
if (NULL == ir_graph)
{
    return NULL;
}

ir_graph->attribute = (struct attribute*)sys_malloc(sizeof(struct attribute));
// 初始化graph
init_ir_graph(ir_graph, context);

return ir_graph;
}

// 初始化graph，给graph的属性赋初值
void init_ir_graph(ir_graph_t* graph, struct context* context)
{
graph->tensor_list = NULL;
graph->node_list = NULL;
graph->input_nodes = NULL;
graph->output_nodes = NULL;

graph->tensor_num = 0;
graph->node_num = 0;
graph->input_num = 0;
graph->output_num = 0;

graph->subgraph_list = create_vector(sizeof(struct subgraph*), NULL);

graph->graph_layout = TENGINE_LAYOUT_NCHW;
graph->model_layout = TENGINE_LAYOUT_NCHW;
graph->model_format = MODEL_FORMAT_TENGINE;

graph->serializer = NULL;
graph->serializer_privacy = NULL;

graph->device = NULL;
graph->device_privacy = NULL;

graph->status = GRAPH_STAT_CREATED;

init_attribute(graph->attribute, context);
}

// 给graph设置输入节点设置id
// 如果graph输入节点已经有值，则覆盖掉
int set_ir_graph_input_node(ir_graph_t* graph, int16_t input_nodes[], int input_number)
{
if (0 >= input_number)
{
    return -1;
}
// 为新的输入节点的index列表分配内存空间
int16_t* new_input_nodes = (int16_t*)sys_malloc(input_number * sizeof(int16_t));
if (NULL == new_input_nodes)
{
    return -1;
}
// 如果graph中以及有输入节点的index列表了，就释放掉
if (NULL != graph->input_nodes)
{
    sys_free(graph->input_nodes);
    graph->input_nodes = NULL;
}
// 将graph的输入节点的index列表指向新分配的空间
graph->input_nodes = new_input_nodes;
graph->input_num = input_number;

for (int i = 0; i < input_number; i++)
{
    // 获取graph中的每一个输入节点，设置类型为输入类型，graph的输入节点的index设置为新的index
    ir_node_t* node = get_ir_graph_node(graph, input_nodes[i]);
    node->node_type = TE_NODE_TYPE_INPUT;
    graph->input_nodes[i] = input_nodes[i];
}

return 0;
}

// 返回graph的tensor列表的第index个tensor
struct tensor* get_ir_graph_tensor(ir_graph_t* graph, int index)
{
return graph->tensor_list[index];
}

// 返回graph的node列表的第index个node
struct node* get_ir_graph_node(ir_graph_t* graph, int index)
{
return graph->node_list[index];
}

// 返回graph的子图列表的第index个子图
struct subgraph* get_ir_graph_subgraph(ir_graph_t* graph, int index)
{
return *(struct subgraph**)get_vector_data(graph->subgraph_list, index);
}


int infer_ir_graph_shape(ir_graph_t* graph)
{
const int node_num = graph->node_num;
// 遍历graph的每一个node
for (int i = 0; i < node_num; i++)
{
    // 获取到graph的node列表的第i个node
    ir_node_t* node = get_ir_graph_node(graph, i);
    // 获取node的op
    ir_op_t* op = &node->op;

    if (node->input_num == 0)
        continue;

    if (node->dynamic_shape)
    {
        // populate the dynamic_shape
        int output_num = node->output_num;

        for (int j = 0; j < output_num; j++)
        {
            ir_tensor_t* tensor = get_ir_graph_tensor(graph, j);

            for (int l = 0; l < tensor->consumer_num; l++)
            {
                ir_node_t* child_node = get_ir_graph_node(graph, l);
                child_node->dynamic_shape = 1;
            }
        }

        continue;
    }
    // 如果op不改变维度，就是把当前node的输入维度等信息直接拷贝给输出上
    if (0 != op->same_shape)
    {
        ir_tensor_t* input = get_ir_graph_tensor(graph, node->input_tensors[0]);
        ir_tensor_t* output = get_ir_graph_tensor(graph, node->output_tensors[0]);

        output->dim_num = input->dim_num;
        output->elem_num = input->elem_num;

        memcpy(output->dims, input->dims, sizeof(int32_t) * input->dim_num);
    }
    else
    {
        if (0 != op->infer_shape(node))
        {
            TLOG_ERR("Tengine FATAL: Infer node(id: %d, op: %s) shape failed.\n", node->index,
                     get_op_name_from_type(node->op.type));
            return -1;
        }
    }

    for (int j = 0; j < node->output_num; j++)
    {
        ir_tensor_t* tensor = get_ir_graph_tensor(graph, node->output_tensors[j]);

        tensor->reshaped = 0;
    }
}

return 0;
}

原文链接：https://zhuanlan.zhihu.com/p/399854346
作者：闪电侠的右手

推荐阅读

Tengine推理引擎 | Graph源码解读

Tensor相关

Node相关

Graph相关

推荐阅读

目录